mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-05 16:28:17 +00:00
Compare commits
1 Commits
Xe/fix-doc
...
Xe/allow-i
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ffa67fc46a |
12
.air.toml
12
.air.toml
@@ -1,12 +0,0 @@
|
||||
root = "."
|
||||
tmp_dir = "var"
|
||||
|
||||
[build]
|
||||
cmd = "go build -o ./var/main ./cmd/anubis"
|
||||
bin = "./var/main"
|
||||
args = ["--use-remote-address"]
|
||||
exclude_dir = ["var", "vendor", "docs", "node_modules"]
|
||||
|
||||
[logger]
|
||||
time = true
|
||||
# to change flags at runtime, prepend with -- e.g. $ air -- --target http://localhost:3000 --difficulty 20 --use-remote-address
|
||||
@@ -1,12 +0,0 @@
|
||||
FROM ghcr.io/xe/devcontainer-base/pre/go
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY go.mod go.sum package.json package-lock.json ./
|
||||
RUN apt-get update \
|
||||
&& apt-get -y install zstd brotli redis \
|
||||
&& mkdir -p /home/vscode/.local/share/fish \
|
||||
&& chown -R vscode:vscode /home/vscode/.local/share/fish \
|
||||
&& chown -R vscode:vscode /go
|
||||
|
||||
CMD ["/usr/bin/sleep", "infinity"]
|
||||
@@ -1,13 +0,0 @@
|
||||
# Anubis Dev Container
|
||||
|
||||
Anubis offers a [development container](https://containers.dev/) image in order to make it easier to contribute to the project. This image is based on [Xe/devcontainer-base/go](https://github.com/Xe/devcontainer-base/tree/main/src/go), which is based on Debian Bookworm with the following customizations:
|
||||
|
||||
- [Fish](https://fishshell.com/) as the shell complete with a custom theme
|
||||
- [Go](https://go.dev) at the most recent stable version
|
||||
- [Node.js](https://nodejs.org/en) at the most recent stable version
|
||||
- [Atuin](https://atuin.sh/) to sync shell history between your host OS and the development container
|
||||
- [Docker](https://docker.com) to manage and build Anubis container images from inside the development container
|
||||
- [Ko](https://ko.build/) to build production-ready Anubis container images
|
||||
- [Neovim](https://neovim.io/) for use with Git
|
||||
|
||||
This development container is tested and known to work with [Visual Studio Code](https://code.visualstudio.com/). If you run into problems with it outside of VS Code, please file an issue and let us know what editor you are using.
|
||||
@@ -1,34 +0,0 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/debian
|
||||
{
|
||||
"name": "Dev",
|
||||
"dockerComposeFile": [
|
||||
"./docker-compose.yaml"
|
||||
],
|
||||
"service": "workspace",
|
||||
"workspaceFolder": "/workspace/anubis",
|
||||
"postStartCommand": "bash ./.devcontainer/poststart.sh",
|
||||
"features": {
|
||||
"ghcr.io/xe/devcontainer-features/ko:1.1.0": {},
|
||||
"ghcr.io/devcontainers/features/github-cli:1": {}
|
||||
},
|
||||
"initializeCommand": "mkdir -p ${localEnv:HOME}${localEnv:USERPROFILE}/.local/share/atuin",
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"esbenp.prettier-vscode",
|
||||
"ms-azuretools.vscode-containers",
|
||||
"golang.go",
|
||||
"unifiedjs.vscode-mdx",
|
||||
"a-h.templ",
|
||||
"redhat.vscode-yaml",
|
||||
"streetsidesoftware.code-spell-checker"
|
||||
],
|
||||
"settings": {
|
||||
"chat.instructionsFilesLocations": {
|
||||
".github/copilot-instructions.md": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
services:
|
||||
playwright:
|
||||
image: mcr.microsoft.com/playwright:v1.52.0-noble
|
||||
init: true
|
||||
network_mode: service:workspace
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- npx -y playwright@1.52.0 run-server --port 9001 --host 0.0.0.0
|
||||
|
||||
valkey:
|
||||
image: valkey/valkey:8
|
||||
pull_policy: always
|
||||
|
||||
# VS Code workspace service
|
||||
workspace:
|
||||
image: ghcr.io/techarohq/anubis/devcontainer
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: .devcontainer/Dockerfile
|
||||
volumes:
|
||||
- ../:/workspace/anubis:cached
|
||||
environment:
|
||||
VALKEY_URL: redis://valkey:6379/0
|
||||
#entrypoint: ["/usr/bin/sleep", "infinity"]
|
||||
user: vscode
|
||||
@@ -1,9 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
pwd
|
||||
|
||||
npm ci &
|
||||
go mod download &
|
||||
go install ./utils/cmd/... &
|
||||
|
||||
wait
|
||||
1
.gitattributes
vendored
1
.gitattributes
vendored
@@ -1 +0,0 @@
|
||||
**/*_templ.go linguist-generated=true
|
||||
4
.github/FUNDING.yml
vendored
4
.github/FUNDING.yml
vendored
@@ -1,3 +1 @@
|
||||
patreon: cadey
|
||||
github: xe
|
||||
liberapay: Xe
|
||||
patreon: cadey
|
||||
61
.github/ISSUE_TEMPLATE/bug_report.yaml
vendored
61
.github/ISSUE_TEMPLATE/bug_report.yaml
vendored
@@ -1,61 +0,0 @@
|
||||
name: Bug report
|
||||
description: Create a report to help us improve
|
||||
|
||||
body:
|
||||
- type: textarea
|
||||
id: description-of-bug
|
||||
attributes:
|
||||
label: Describe the bug
|
||||
description: A clear and concise description of what the bug is.
|
||||
placeholder: I can reliably get an error when...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: steps-to-reproduce
|
||||
attributes:
|
||||
label: Steps to reproduce
|
||||
description: |
|
||||
Steps to reproduce the behavior.
|
||||
placeholder: |
|
||||
1. Go to the following url...
|
||||
2. Click on...
|
||||
3. You get the following error: ...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: expected-behavior
|
||||
attributes:
|
||||
label: Expected behavior
|
||||
description: |
|
||||
A clear and concise description of what you expected to happen.
|
||||
Ideally also describe *why* you expect it to happen.
|
||||
placeholder: Instead of displaying an error, it would...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: version-os
|
||||
attributes:
|
||||
label: Your operating system and its version.
|
||||
description: Unsure? Visit https://whatsmyos.com/
|
||||
placeholder: Android 13
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: version-browser
|
||||
attributes:
|
||||
label: Your browser and its version.
|
||||
description: Unsure? Visit https://www.whatsmybrowser.org/
|
||||
placeholder: Firefox 142
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: additional-context
|
||||
attributes:
|
||||
label: Additional context
|
||||
description: Add any other context about the problem here.
|
||||
|
||||
5
.github/ISSUE_TEMPLATE/config.yml
vendored
5
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -1,5 +0,0 @@
|
||||
blank_issues_enabled: false
|
||||
contact_links:
|
||||
- name: Security
|
||||
url: https://techaro.lol/contact
|
||||
about: Do not file security reports here. Email security@techaro.lol.
|
||||
39
.github/ISSUE_TEMPLATE/feature_request.yaml
vendored
39
.github/ISSUE_TEMPLATE/feature_request.yaml
vendored
@@ -1,39 +0,0 @@
|
||||
name: Feature request
|
||||
description: Suggest an idea for this project
|
||||
title: '[Feature request] '
|
||||
|
||||
body:
|
||||
- type: textarea
|
||||
id: description-of-bug
|
||||
attributes:
|
||||
label: Is your feature request related to a problem? Please describe.
|
||||
description: A clear and concise description of what the problem is that made you submit this report.
|
||||
placeholder: I am always frustrated, when...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: description-of-solution
|
||||
attributes:
|
||||
label: Solution you would like.
|
||||
description: A clear and concise description of what you want to happen.
|
||||
placeholder: Instead of behaving like this, there should be...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: alternatives
|
||||
attributes:
|
||||
label: Describe alternatives you have considered.
|
||||
description: A clear and concise description of any alternative solutions or features you have considered.
|
||||
placeholder: Another workaround that would work, is...
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: additional-context
|
||||
attributes:
|
||||
label: Additional context
|
||||
description: Add any other context (such as mock-ups, proof of concepts or screenshots) about the feature request here.
|
||||
validations:
|
||||
required: false
|
||||
12
.github/PULL_REQUEST_TEMPLATE.md
vendored
12
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -1,12 +0,0 @@
|
||||
<!--
|
||||
delete me and describe your change here, give enough context for a maintainer to understand what and why
|
||||
|
||||
See https://anubis.techaro.lol/docs/developer/code-quality for more information
|
||||
-->
|
||||
|
||||
Checklist:
|
||||
|
||||
- [ ] Added a description of the changes to the `[Unreleased]` section of docs/docs/CHANGELOG.md
|
||||
- [ ] Added test cases to [the relevant parts of the codebase](https://anubis.techaro.lol/docs/developer/code-quality)
|
||||
- [ ] Ran integration tests `npm run test:integration` (unsupported on Windows, please use WSL)
|
||||
- [ ] All of my commits have [verified signatures](https://anubis.techaro.lol/docs/developer/signed-commits)
|
||||
17
.github/actions/spelling/README.md
vendored
17
.github/actions/spelling/README.md
vendored
@@ -1,17 +0,0 @@
|
||||
# check-spelling/check-spelling configuration
|
||||
|
||||
File | Purpose | Format | Info
|
||||
-|-|-|-
|
||||
[dictionary.txt](dictionary.txt) | Replacement dictionary (creating this file will override the default dictionary) | one word per line | [dictionary](https://github.com/check-spelling/check-spelling/wiki/Configuration#dictionary)
|
||||
[allow.txt](allow.txt) | Add words to the dictionary | one word per line (only letters and `'`s allowed) | [allow](https://github.com/check-spelling/check-spelling/wiki/Configuration#allow)
|
||||
[reject.txt](reject.txt) | Remove words from the dictionary (after allow) | grep pattern matching whole dictionary words | [reject](https://github.com/check-spelling/check-spelling/wiki/Configuration-Examples%3A-reject)
|
||||
[excludes.txt](excludes.txt) | Files to ignore entirely | perl regular expression | [excludes](https://github.com/check-spelling/check-spelling/wiki/Configuration-Examples%3A-excludes)
|
||||
[only.txt](only.txt) | Only check matching files (applied after excludes) | perl regular expression | [only](https://github.com/check-spelling/check-spelling/wiki/Configuration-Examples%3A-only)
|
||||
[patterns.txt](patterns.txt) | Patterns to ignore from checked lines | perl regular expression (order matters, first match wins) | [patterns](https://github.com/check-spelling/check-spelling/wiki/Configuration-Examples%3A-patterns)
|
||||
[candidate.patterns](candidate.patterns) | Patterns that might be worth adding to [patterns.txt](patterns.txt) | perl regular expression with optional comment block introductions (all matches will be suggested) | [candidates](https://github.com/check-spelling/check-spelling/wiki/Feature:-Suggest-patterns)
|
||||
[line_forbidden.patterns](line_forbidden.patterns) | Patterns to flag in checked lines | perl regular expression (order matters, first match wins) | [patterns](https://github.com/check-spelling/check-spelling/wiki/Configuration-Examples%3A-patterns)
|
||||
[expect.txt](expect.txt) | Expected words that aren't in the dictionary | one word per line (sorted, alphabetically) | [expect](https://github.com/check-spelling/check-spelling/wiki/Configuration#expect)
|
||||
[advice.md](advice.md) | Supplement for GitHub comment when unrecognized words are found | GitHub Markdown | [advice](https://github.com/check-spelling/check-spelling/wiki/Configuration-Examples%3A-advice)
|
||||
|
||||
Note: you can replace any of these files with a directory by the same name (minus the suffix)
|
||||
and then include multiple files inside that directory (with that suffix) to merge multiple files together.
|
||||
31
.github/actions/spelling/advice.md
vendored
31
.github/actions/spelling/advice.md
vendored
@@ -1,31 +0,0 @@
|
||||
<!-- See https://github.com/check-spelling/check-spelling/wiki/Configuration-Examples%3A-advice --> <!-- markdownlint-disable MD033 MD041 -->
|
||||
<details><summary>If the flagged items are :exploding_head: false positives</summary>
|
||||
|
||||
If items relate to a ...
|
||||
* binary file (or some other file you wouldn't want to check at all).
|
||||
|
||||
Please add a file path to the `excludes.txt` file matching the containing file.
|
||||
|
||||
File paths are Perl 5 Regular Expressions - you can [test](
|
||||
https://www.regexplanet.com/advanced/perl/) yours before committing to verify it will match your files.
|
||||
|
||||
`^` refers to the file's path from the root of the repository, so `^README\.md$` would exclude [README.md](
|
||||
../tree/HEAD/README.md) (on whichever branch you're using).
|
||||
|
||||
* well-formed pattern.
|
||||
|
||||
If you can write a [pattern](
|
||||
https://github.com/check-spelling/check-spelling/wiki/Configuration-Examples:-patterns
|
||||
) that would match it,
|
||||
try adding it to the `patterns.txt` file.
|
||||
|
||||
Patterns are Perl 5 Regular Expressions - you can [test](
|
||||
https://www.regexplanet.com/advanced/perl/) yours before committing to verify it will match your lines.
|
||||
|
||||
Note that patterns can't match multiline strings.
|
||||
|
||||
</details>
|
||||
|
||||
<!-- adoption information-->
|
||||
:steam_locomotive: If you're seeing this message and your PR is from a branch that doesn't have check-spelling,
|
||||
please merge to your PR's base branch to get the version configured for your repository.
|
||||
10
.github/actions/spelling/allow.txt
vendored
10
.github/actions/spelling/allow.txt
vendored
@@ -1,10 +0,0 @@
|
||||
github
|
||||
https
|
||||
ssh
|
||||
ubuntu
|
||||
workarounds
|
||||
rjack
|
||||
msgbox
|
||||
xeact
|
||||
ABee
|
||||
tencent
|
||||
779
.github/actions/spelling/candidate.patterns
vendored
779
.github/actions/spelling/candidate.patterns
vendored
@@ -1,779 +0,0 @@
|
||||
# Repeated letters
|
||||
#\b([a-z])\g{-1}{2,}\b
|
||||
|
||||
# marker to ignore all code on line
|
||||
^.*/\* #no-spell-check-line \*/.*$
|
||||
# marker to ignore all code on line
|
||||
^.*\bno-spell-check(?:-line|)(?:\s.*|)$
|
||||
|
||||
# https://cspell.org/configuration/document-settings/
|
||||
# cspell inline
|
||||
^.*\b[Cc][Ss][Pp][Ee][Ll]{2}:\s*[Dd][Ii][Ss][Aa][Bb][Ll][Ee]-[Ll][Ii][Nn][Ee]\b
|
||||
|
||||
# copyright
|
||||
Copyright (?:\([Cc]\)|)(?:[-\d, ]|and)+(?: [A-Z][a-z]+ [A-Z][a-z]+,?)+
|
||||
|
||||
# patch hunk comments
|
||||
^@@ -\d+(?:,\d+|) \+\d+(?:,\d+|) @@ .*
|
||||
# git index header
|
||||
index (?:[0-9a-z]{7,40},|)[0-9a-z]{7,40}\.\.[0-9a-z]{7,40}
|
||||
|
||||
# file permissions
|
||||
['"`\s][-bcdLlpsw](?:[-r][-w][-Ssx]){2}[-r][-w][-SsTtx]\+?['"`\s]
|
||||
|
||||
# css fonts
|
||||
\bfont(?:-family|):[^;}]+
|
||||
|
||||
# css url wrappings
|
||||
\burl\([^)]+\)
|
||||
|
||||
# cid urls
|
||||
(['"])cid:.*?\g{-1}
|
||||
|
||||
# data url in parens
|
||||
\(data:(?:[^) ][^)]*?|)(?:[A-Z]{3,}|[A-Z][a-z]{2,}|[a-z]{3,})[^)]*\)
|
||||
# data url in quotes
|
||||
([`'"])data:(?:[^ `'"].*?|)(?:[A-Z]{3,}|[A-Z][a-z]{2,}|[a-z]{3,}).*\g{-1}
|
||||
# data url
|
||||
\bdata:[-a-zA-Z=;:/0-9+]*,\S*
|
||||
|
||||
# https/http/file urls
|
||||
(?:\b(?:https?|ftp|file)://)[-A-Za-z0-9+&@#/*%?=~_|!:,.;]+[-A-Za-z0-9+&@#/*%=~_|]
|
||||
|
||||
# mailto urls
|
||||
mailto:[-a-zA-Z=;:/?%&0-9+@._]{3,}
|
||||
|
||||
# magnet urls
|
||||
magnet:[?=:\w]+
|
||||
|
||||
# magnet urls
|
||||
"magnet:[^"]+"
|
||||
|
||||
# obs:
|
||||
"obs:[^"]*"
|
||||
|
||||
# The `\b` here means a break, it's the fancy way to handle urls, but it makes things harder to read
|
||||
# In this examples content, I'm using a number of different ways to match things to show various approaches
|
||||
# asciinema
|
||||
\basciinema\.org/a/[0-9a-zA-Z]+
|
||||
|
||||
# asciinema v2
|
||||
^\[\d+\.\d+, "[io]", ".*"\]$
|
||||
|
||||
# apple
|
||||
\bdeveloper\.apple\.com/[-\w?=/]+
|
||||
# Apple music
|
||||
\bembed\.music\.apple\.com/fr/playlist/usr-share/[-\w.]+
|
||||
|
||||
# appveyor api
|
||||
\bci\.appveyor\.com/api/projects/status/[0-9a-z]+
|
||||
# appveyor project
|
||||
\bci\.appveyor\.com/project/(?:[^/\s"]*/){2}builds?/\d+/job/[0-9a-z]+
|
||||
|
||||
# Amazon
|
||||
|
||||
# Amazon
|
||||
\bamazon\.com/[-\w]+/(?:dp/[0-9A-Z]+|)
|
||||
# AWS ARN
|
||||
arn:aws:[-/:\w]+
|
||||
# AWS S3
|
||||
\b\w*\.s3[^.]*\.amazonaws\.com/[-\w/&#%_?:=]*
|
||||
# AWS execute-api
|
||||
\b[0-9a-z]{10}\.execute-api\.[-0-9a-z]+\.amazonaws\.com\b
|
||||
# AWS ELB
|
||||
\b\w+\.[-0-9a-z]+\.elb\.amazonaws\.com\b
|
||||
# AWS SNS
|
||||
\bsns\.[-0-9a-z]+.amazonaws\.com/[-\w/&#%_?:=]*
|
||||
# AWS VPC
|
||||
vpc-\w+
|
||||
|
||||
# While you could try to match `http://` and `https://` by using `s?` in `https?://`, sometimes there
|
||||
# YouTube url
|
||||
\b(?:(?:www\.|)youtube\.com|youtu.be)/(?:channel/|embed/|user/|playlist\?list=|watch\?v=|v/|)[-a-zA-Z0-9?&=_%]*
|
||||
# YouTube music
|
||||
\bmusic\.youtube\.com/youtubei/v1/browse(?:[?&]\w+=[-a-zA-Z0-9?&=_]*)
|
||||
# YouTube tag
|
||||
<\s*youtube\s+id=['"][-a-zA-Z0-9?_]*['"]
|
||||
# YouTube image
|
||||
\bimg\.youtube\.com/vi/[-a-zA-Z0-9?&=_]*
|
||||
# Google Accounts
|
||||
\baccounts.google.com/[-_/?=.:;+%&0-9a-zA-Z]*
|
||||
# Google Analytics
|
||||
\bgoogle-analytics\.com/collect.[-0-9a-zA-Z?%=&_.~]*
|
||||
# Google APIs
|
||||
\bgoogleapis\.(?:com|dev)/[a-z]+/(?:v\d+/|)[a-z]+/[-@:./?=\w+|&]+
|
||||
# Google Artifact Registry
|
||||
\.pkg\.dev(?:/[-\w]+)+(?::[-\w]+|)
|
||||
# Google Storage
|
||||
\b[-a-zA-Z0-9.]*\bstorage\d*\.googleapis\.com(?:/\S*|)
|
||||
# Google Calendar
|
||||
\bcalendar\.google\.com/calendar(?:/u/\d+|)/embed\?src=[@./?=\w&%]+
|
||||
\w+\@group\.calendar\.google\.com\b
|
||||
# Google DataStudio
|
||||
\bdatastudio\.google\.com/(?:(?:c/|)u/\d+/|)(?:embed/|)(?:open|reporting|datasources|s)/[-0-9a-zA-Z]+(?:/page/[-0-9a-zA-Z]+|)
|
||||
# The leading `/` here is as opposed to the `\b` above
|
||||
# ... a short way to match `https://` or `http://` since most urls have one of those prefixes
|
||||
# Google Docs
|
||||
/docs\.google\.com/[a-z]+/(?:ccc\?key=\w+|(?:u/\d+|d/(?:e/|)[0-9a-zA-Z_-]+/)?(?:edit\?[-\w=#.]*|/\?[\w=&]*|))
|
||||
# Google Drive
|
||||
\bdrive\.google\.com/(?:file/d/|open)[-0-9a-zA-Z_?=]*
|
||||
# Google Groups
|
||||
\bgroups\.google\.com(?:/[a-z]+/(?:#!|)[^/\s"]+)*
|
||||
# Google Maps
|
||||
\bmaps\.google\.com/maps\?[\w&;=]*
|
||||
# Google themes
|
||||
themes\.googleusercontent\.com/static/fonts/[^/\s"]+/v\d+/[^.]+.
|
||||
# Google CDN
|
||||
\bclients2\.google(?:usercontent|)\.com[-0-9a-zA-Z/.]*
|
||||
# Goo.gl
|
||||
/goo\.gl/[a-zA-Z0-9]+
|
||||
# Google Chrome Store
|
||||
\bchrome\.google\.com/webstore/detail/[-\w]*(?:/\w*|)
|
||||
# Google Books
|
||||
\bgoogle\.(?:\w{2,4})/books(?:/\w+)*\?[-\w\d=&#.]*
|
||||
# Google Fonts
|
||||
\bfonts\.(?:googleapis|gstatic)\.com/[-/?=:;+&0-9a-zA-Z]*
|
||||
# Google Forms
|
||||
\bforms\.gle/\w+
|
||||
# Google Scholar
|
||||
\bscholar\.google\.com/citations\?user=[A-Za-z0-9_]+
|
||||
# Google Colab Research Drive
|
||||
\bcolab\.research\.google\.com/drive/[-0-9a-zA-Z_?=]*
|
||||
# Google Cloud regions
|
||||
(?:us|(?:north|south)america|europe|asia|australia|me|africa)-(?:north|south|east|west|central){1,2}\d+
|
||||
|
||||
# GitHub SHAs (api)
|
||||
\bapi.github\.com/repos(?:/[^/\s"]+){3}/[0-9a-f]+\b
|
||||
# GitHub SHAs (markdown)
|
||||
(?:\[`?[0-9a-f]+`?\]\(https:/|)/(?:www\.|)github\.com(?:/[^/\s"]+){2,}(?:/[^/\s")]+)(?:[0-9a-f]+(?:[-0-9a-zA-Z/#.]*|)\b|)
|
||||
# GitHub SHAs
|
||||
\bgithub\.com(?:/[^/\s"]+){2}[@#][0-9a-f]+\b
|
||||
# GitHub SHA refs
|
||||
\[([0-9a-f]+)\]\(https://(?:www\.|)github.com/[-\w]+/[-\w]+/commit/\g{-1}[0-9a-f]*
|
||||
# GitHub wiki
|
||||
\bgithub\.com/(?:[^/]+/){2}wiki/(?:(?:[^/]+/|)_history|[^/]+(?:/_compare|)/[0-9a-f.]{40,})\b
|
||||
# githubusercontent
|
||||
/[-a-z0-9]+\.githubusercontent\.com/[-a-zA-Z0-9?&=_\/.]*
|
||||
# githubassets
|
||||
\bgithubassets.com/[0-9a-f]+(?:[-/\w.]+)
|
||||
# gist github
|
||||
\bgist\.github\.com/[^/\s"]+/[0-9a-f]+
|
||||
# git.io
|
||||
\bgit\.io/[0-9a-zA-Z]+
|
||||
# GitHub JSON
|
||||
"node_id": "[-a-zA-Z=;:/0-9+_]*"
|
||||
# Contributor
|
||||
\[[^\]]+\]\(https://github\.com/[^/\s"]+/?\)
|
||||
# GHSA
|
||||
GHSA(?:-[0-9a-z]{4}){3}
|
||||
|
||||
# GitHub actions
|
||||
\buses:\s+[-\w.]+/[-\w./]+@[-\w.]+
|
||||
|
||||
# GitLab commit
|
||||
\bgitlab\.[^/\s"]*/\S+/\S+/commit/[0-9a-f]{7,16}#[0-9a-f]{40}\b
|
||||
# GitLab merge requests
|
||||
\bgitlab\.[^/\s"]*/\S+/\S+/-/merge_requests/\d+/diffs#[0-9a-f]{40}\b
|
||||
# GitLab uploads
|
||||
\bgitlab\.[^/\s"]*/uploads/[-a-zA-Z=;:/0-9+]*
|
||||
# GitLab commits
|
||||
\bgitlab\.[^/\s"]*/(?:[^/\s"]+/){2}commits?/[0-9a-f]+\b
|
||||
|
||||
# #includes
|
||||
^\s*#include\s*(?:<.*?>|".*?")
|
||||
|
||||
# #pragma lib
|
||||
^\s*#pragma comment\(lib, ".*?"\)
|
||||
|
||||
# binance
|
||||
accounts\.binance\.com/[a-z/]*oauth/authorize\?[-0-9a-zA-Z&%]*
|
||||
|
||||
# bitbucket diff
|
||||
\bapi\.bitbucket\.org/\d+\.\d+/repositories/(?:[^/\s"]+/){2}diff(?:stat|)(?:/[^/\s"]+){2}:[0-9a-f]+
|
||||
# bitbucket repositories commits
|
||||
\bapi\.bitbucket\.org/\d+\.\d+/repositories/(?:[^/\s"]+/){2}commits?/[0-9a-f]+
|
||||
# bitbucket commits
|
||||
\bbitbucket\.org/(?:[^/\s"]+/){2}commits?/[0-9a-f]+
|
||||
|
||||
# bit.ly
|
||||
\bbit\.ly/\w+
|
||||
|
||||
# bitrise
|
||||
\bapp\.bitrise\.io/app/[0-9a-f]*/[\w.?=&]*
|
||||
|
||||
# bootstrapcdn.com
|
||||
\bbootstrapcdn\.com/[-./\w]+
|
||||
|
||||
# cdn.cloudflare.com
|
||||
\bcdnjs\.cloudflare\.com/[./\w]+
|
||||
|
||||
# circleci
|
||||
\bcircleci\.com/gh(?:/[^/\s"]+){1,5}.[a-z]+\?[-0-9a-zA-Z=&]+
|
||||
|
||||
# gitter
|
||||
\bgitter\.im(?:/[^/\s"]+){2}\?at=[0-9a-f]+
|
||||
|
||||
# gravatar
|
||||
\bgravatar\.com/avatar/[0-9a-f]+
|
||||
|
||||
# ibm
|
||||
[a-z.]*ibm\.com/[-_#=:%!?~.\\/\d\w]*
|
||||
|
||||
# imgur
|
||||
\bimgur\.com/[^.]+
|
||||
|
||||
# Internet Archive
|
||||
\barchive\.org/web/\d+/(?:[-\w.?,'/\\+&%$#_:]*)
|
||||
|
||||
# discord
|
||||
/discord(?:app\.com|\.gg)/(?:invite/)?[a-zA-Z0-9]{7,}
|
||||
|
||||
# Disqus
|
||||
\bdisqus\.com/[-\w/%.()!?&=_]*
|
||||
|
||||
# medium link
|
||||
\blink\.medium\.com/[a-zA-Z0-9]+
|
||||
# medium
|
||||
\bmedium\.com/@?[^/\s"]+/[-\w]+
|
||||
|
||||
# microsoft
|
||||
\b(?:https?://|)(?:(?:(?:blogs|download\.visualstudio|docs|msdn2?|research)\.|)microsoft|blogs\.msdn)\.co(?:m|\.\w\w)/[-_a-zA-Z0-9()=./%]*
|
||||
# powerbi
|
||||
\bapp\.powerbi\.com/reportEmbed/[^"' ]*
|
||||
# vs devops
|
||||
\bvisualstudio.com(?::443|)/[-\w/?=%&.]*
|
||||
# microsoft store
|
||||
\bmicrosoft\.com/store/apps/\w+
|
||||
|
||||
# mvnrepository.com
|
||||
\bmvnrepository\.com/[-0-9a-z./]+
|
||||
|
||||
# now.sh
|
||||
/[0-9a-z-.]+\.now\.sh\b
|
||||
|
||||
# oracle
|
||||
\bdocs\.oracle\.com/[-0-9a-zA-Z./_?#&=]*
|
||||
|
||||
# chromatic.com
|
||||
/\S+.chromatic.com\S*[")]
|
||||
|
||||
# codacy
|
||||
\bapi\.codacy\.com/project/badge/Grade/[0-9a-f]+
|
||||
|
||||
# compai
|
||||
\bcompai\.pub/v1/png/[0-9a-f]+
|
||||
|
||||
# mailgun api
|
||||
\.api\.mailgun\.net/v3/domains/[0-9a-z]+\.mailgun.org/messages/[0-9a-zA-Z=@]*
|
||||
# mailgun
|
||||
\b[0-9a-z]+.mailgun.org
|
||||
|
||||
# /message-id/
|
||||
/message-id/[-\w@./%]+
|
||||
|
||||
# Reddit
|
||||
\breddit\.com/r/[/\w_]*
|
||||
|
||||
# requestb.in
|
||||
\brequestb\.in/[0-9a-z]+
|
||||
|
||||
# sched
|
||||
\b[a-z0-9]+\.sched\.com\b
|
||||
|
||||
# Slack url
|
||||
slack://[a-zA-Z0-9?&=]+
|
||||
# Slack
|
||||
\bslack\.com/[-0-9a-zA-Z/_~?&=.]*
|
||||
# Slack edge
|
||||
\bslack-edge\.com/[-a-zA-Z0-9?&=%./]+
|
||||
# Slack images
|
||||
\bslack-imgs\.com/[-a-zA-Z0-9?&=%.]+
|
||||
|
||||
# shields.io
|
||||
\bshields\.io/[-\w/%?=&.:+;,]*
|
||||
|
||||
# stackexchange -- https://stackexchange.com/feeds/sites
|
||||
\b(?:askubuntu|serverfault|stack(?:exchange|overflow)|superuser).com/(?:questions/\w+/[-\w]+|a/)
|
||||
|
||||
# Sentry
|
||||
[0-9a-f]{32}\@o\d+\.ingest\.sentry\.io\b
|
||||
|
||||
# Twitter markdown
|
||||
\[@[^[/\]:]*?\]\(https://twitter.com/[^/\s"')]*(?:/status/\d+(?:\?[-_0-9a-zA-Z&=]*|)|)\)
|
||||
# Twitter hashtag
|
||||
\btwitter\.com/hashtag/[\w?_=&]*
|
||||
# Twitter status
|
||||
\btwitter\.com/[^/\s"')]*(?:/status/\d+(?:\?[-_0-9a-zA-Z&=]*|)|)
|
||||
# Twitter profile images
|
||||
\btwimg\.com/profile_images/[_\w./]*
|
||||
# Twitter media
|
||||
\btwimg\.com/media/[-_\w./?=]*
|
||||
# Twitter link shortened
|
||||
\bt\.co/\w+
|
||||
|
||||
# facebook
|
||||
\bfburl\.com/[0-9a-z_]+
|
||||
# facebook CDN
|
||||
\bfbcdn\.net/[\w/.,]*
|
||||
# facebook watch
|
||||
\bfb\.watch/[0-9A-Za-z]+
|
||||
|
||||
# dropbox
|
||||
\bdropbox\.com/sh?/[^/\s"]+/[-0-9A-Za-z_.%?=&;]+
|
||||
|
||||
# ipfs protocol
|
||||
ipfs://[0-9a-zA-Z]{3,}
|
||||
# ipfs url
|
||||
/ipfs/[0-9a-zA-Z]{3,}
|
||||
|
||||
# w3
|
||||
\bw3\.org/[-0-9a-zA-Z/#.]+
|
||||
|
||||
# loom
|
||||
\bloom\.com/embed/[0-9a-f]+
|
||||
|
||||
# regex101
|
||||
\bregex101\.com/r/[^/\s"]+/\d+
|
||||
|
||||
# figma
|
||||
\bfigma\.com/file(?:/[0-9a-zA-Z]+/)+
|
||||
|
||||
# freecodecamp.org
|
||||
\bfreecodecamp\.org/[-\w/.]+
|
||||
|
||||
# image.tmdb.org
|
||||
\bimage\.tmdb\.org/[/\w.]+
|
||||
|
||||
# mermaid
|
||||
\bmermaid\.ink/img/[-\w]+|\bmermaid-js\.github\.io/mermaid-live-editor/#/edit/[-\w]+
|
||||
|
||||
# Wikipedia
|
||||
\ben\.wikipedia\.org/wiki/[-\w%.#]+
|
||||
|
||||
# gitweb
|
||||
[^"\s]+/gitweb/\S+;h=[0-9a-f]+
|
||||
|
||||
# HyperKitty lists
|
||||
/archives/list/[^@/]+@[^/\s"]*/message/[^/\s"]*/
|
||||
|
||||
# lists
|
||||
/thread\.html/[^"\s]+
|
||||
|
||||
# list-management
|
||||
\blist-manage\.com/subscribe(?:[?&](?:u|id)=[0-9a-f]+)+
|
||||
|
||||
# kubectl.kubernetes.io/last-applied-configuration
|
||||
"kubectl.kubernetes.io/last-applied-configuration": ".*"
|
||||
|
||||
# pgp
|
||||
\bgnupg\.net/pks/lookup[?&=0-9a-zA-Z]*
|
||||
|
||||
# Spotify
|
||||
\bopen\.spotify\.com/embed/playlist/\w+
|
||||
|
||||
# Mastodon
|
||||
\bmastodon\.[-a-z.]*/(?:media/|@)[?&=0-9a-zA-Z_]*
|
||||
|
||||
# scastie
|
||||
\bscastie\.scala-lang\.org/[^/]+/\w+
|
||||
|
||||
# images.unsplash.com
|
||||
\bimages\.unsplash\.com/(?:(?:flagged|reserve)/|)[-\w./%?=%&.;]+
|
||||
|
||||
# pastebin
|
||||
\bpastebin\.com/[\w/]+
|
||||
|
||||
# heroku
|
||||
\b\w+\.heroku\.com/source/archive/\w+
|
||||
|
||||
# quip
|
||||
\b\w+\.quip\.com/\w+(?:(?:#|/issues/)\w+)?
|
||||
|
||||
# badgen.net
|
||||
\bbadgen\.net/badge/[^")\]'\s]+
|
||||
|
||||
# statuspage.io
|
||||
\w+\.statuspage\.io\b
|
||||
|
||||
# media.giphy.com
|
||||
\bmedia\.giphy\.com/media/[^/]+/[\w.?&=]+
|
||||
|
||||
# tinyurl
|
||||
\btinyurl\.com/\w+
|
||||
|
||||
# codepen
|
||||
\bcodepen\.io/[\w/]+
|
||||
|
||||
# registry.npmjs.org
|
||||
\bregistry\.npmjs\.org/(?:@[^/"']+/|)[^/"']+/-/[-\w@.]+
|
||||
|
||||
# getopts
|
||||
\bgetopts\s+(?:"[^"]+"|'[^']+')
|
||||
|
||||
# ANSI color codes
|
||||
(?:\\(?:u00|x)1[Bb]|\\03[1-7]|\x1b|\\u\{1[Bb]\})\[\d+(?:;\d+)*m
|
||||
|
||||
# URL escaped characters
|
||||
%[0-9A-F][A-F](?=[A-Za-z])
|
||||
# lower URL escaped characters
|
||||
%[0-9a-f][a-f](?=[a-z]{2,})
|
||||
# IPv6
|
||||
\b(?:[0-9a-fA-F]{0,4}:){3,7}[0-9a-fA-F]{0,4}\b
|
||||
# c99 hex digits (not the full format, just one I've seen)
|
||||
0x[0-9a-fA-F](?:\.[0-9a-fA-F]*|)[pP]
|
||||
# Punycode
|
||||
\bxn--[-0-9a-z]+
|
||||
# sha
|
||||
sha\d+:[0-9a-f]*?[a-f]{3,}[0-9a-f]*
|
||||
# sha-... -- uses a fancy capture
|
||||
(\\?['"]|")[0-9a-f]{40,}\g{-1}
|
||||
# hex runs
|
||||
\b[0-9a-fA-F]{16,}\b
|
||||
# hex in url queries
|
||||
=[0-9a-fA-F]*?(?:[A-F]{3,}|[a-f]{3,})[0-9a-fA-F]*?&
|
||||
# ssh
|
||||
(?:ssh-\S+|-nistp256) [-a-zA-Z=;:/0-9+]{12,}
|
||||
|
||||
# PGP
|
||||
\b(?:[0-9A-F]{4} ){9}[0-9A-F]{4}\b
|
||||
# GPG keys
|
||||
\b(?:[0-9A-F]{4} ){5}(?: [0-9A-F]{4}){5}\b
|
||||
# Well known gpg keys
|
||||
.well-known/openpgpkey/[\w./]+
|
||||
|
||||
# pki
|
||||
-----BEGIN.*-----END
|
||||
|
||||
# pki (base64)
|
||||
LS0tLS1CRUdJT.*
|
||||
|
||||
# C# includes
|
||||
^\s*using [^;]+;
|
||||
|
||||
# uuid:
|
||||
\b[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}\b
|
||||
# hex digits including css/html color classes:
|
||||
(?:[\\0][xX]|\\u|[uU]\+|#x?|%23|&H)[0-9_a-fA-FgGrR]*?[a-fA-FgGrR]{2,}[0-9_a-fA-FgGrR]*(?:[uUlL]{0,3}|[iu]\d+)\b
|
||||
|
||||
# integrity
|
||||
integrity=(['"])(?:\s*sha\d+-[-a-zA-Z=;:/0-9+]{40,})+\g{-1}
|
||||
|
||||
# https://www.gnu.org/software/groff/manual/groff.html
|
||||
# man troff content
|
||||
\\f[BCIPR]
|
||||
# '/"
|
||||
\\\([ad]q
|
||||
|
||||
# .desktop mime types
|
||||
^MimeTypes?=.*$
|
||||
# .desktop localized entries
|
||||
^[A-Z][a-z]+\[[a-z]+\]=.*$
|
||||
# Localized .desktop content
|
||||
Name\[[^\]]+\]=.*
|
||||
|
||||
# IServiceProvider / isAThing
|
||||
(?:(?:\b|_|(?<=[a-z]))I|(?:\b|_)(?:nsI|isA))(?=(?:[A-Z][a-z]{2,})+(?:[A-Z\d]|\b))
|
||||
|
||||
# crypt
|
||||
(['"])\$2[ayb]\$.{56}\g{-1}
|
||||
|
||||
# apache/old crypt
|
||||
(['"]|)\$+(?:apr|)1\$+.{8}\$+.{22}\g{-1}
|
||||
|
||||
# sha1 hash
|
||||
\{SHA\}[-a-zA-Z=;:/0-9+]{3,}
|
||||
|
||||
# machine learning (?)
|
||||
\b(?i)ml(?=[a-z]{2,})
|
||||
|
||||
# python
|
||||
#\b(?i)py(?!gments|gmy|lon|ramid|ro|th)(?=[a-z]{2,})
|
||||
|
||||
# scrypt / argon
|
||||
\$(?:scrypt|argon\d+[di]*)\$\S+
|
||||
|
||||
# go.sum
|
||||
\bh1:\S+
|
||||
|
||||
# imports
|
||||
^import\s+(?:(?:static|type)\s+|)(?:[\w.]|\{\s*\w*?(?:,\s*(?:\w*|\*))+\s*\})+
|
||||
|
||||
# scala modules
|
||||
("[^"]+"\s*%%?\s*){2,3}"[^"]+"
|
||||
|
||||
# container images
|
||||
image: [-\w./:@]+
|
||||
|
||||
# Docker images
|
||||
^\s*(?i)FROM\s+\S+:\S+(?:\s+AS\s+\S+|)
|
||||
|
||||
# `docker images` REPOSITORY TAG IMAGE ID CREATED SIZE
|
||||
\s*\S+/\S+\s+\S+\s+[0-9a-f]{8,}\s+\d+\s+(?:hour|day|week)s ago\s+[\d.]+[KMGT]B
|
||||
|
||||
# Intel intrinsics
|
||||
_mm_(?!dd)\w+
|
||||
|
||||
# Input to GitHub JSON
|
||||
content: (['"])[-a-zA-Z=;:/0-9+]*=\g{-1}
|
||||
|
||||
# This does not cover multiline strings, if your repository has them,
|
||||
# you'll want to remove the `(?=.*?")` suffix.
|
||||
# The `(?=.*?")` suffix should limit the false positives rate
|
||||
# printf
|
||||
%(?:(?:(?:hh?|ll?|[jzt])?[diuoxn]|l?[cs]|L?[fega]|p)(?=[a-z]{2,})|(?:X|L?[FEGA])(?=[a-zA-Z]{2,}))(?!%)(?=[_a-zA-Z]+(?!%)\b)(?=.*?['"])
|
||||
|
||||
# Alternative printf
|
||||
# %s
|
||||
%(?:s(?=[a-z]{2,}))(?!%)(?=[_a-zA-Z]+(?!%[^s])\b)(?=.*?['"])
|
||||
|
||||
# Python string prefix / binary prefix
|
||||
# Note that there's a high false positive rate, remove the `?=` and search for the regex to see if the matches seem like reasonable strings
|
||||
(?<!['"])\b(?:B|BR|Br|F|FR|Fr|R|RB|RF|Rb|Rf|U|UR|Ur|b|bR|br|f|fR|fr|r|rB|rF|rb|rf|u|uR|ur)['"](?=[A-Z]{3,}|[A-Z][a-z]{2,}|[a-z]{3,})
|
||||
|
||||
# Regular expressions for (P|p)assword
|
||||
\([A-Z]\|[a-z]\)[a-z]+
|
||||
|
||||
# JavaScript regular expressions
|
||||
# javascript test regex
|
||||
/.{3,}/[gim]*\.test\(
|
||||
# javascript match regex
|
||||
\.match\(/[^/\s"]{3,}/[gim]*\s*
|
||||
# javascript match regex
|
||||
\.match\(/\\[b].{3,}?/[gim]*\s*\)(?:;|$)
|
||||
# javascript regex
|
||||
^\s*/\\[b].{3,}?/[gim]*\s*(?:\)(?:;|$)|,$)
|
||||
# javascript replace regex
|
||||
\.replace\(/[^/\s"]{3,}/[gim]*\s*,
|
||||
# assign regex
|
||||
= /[^*].*?(?:[a-z]{3,}|[A-Z]{3,}|[A-Z][a-z]{2,}).*/[gim]*(?=\W|$)
|
||||
# perl regex test
|
||||
[!=]~ (?:/.*/|m\{.*?\}|m<.*?>|m([|!/@#,;']).*?\g{-1})
|
||||
|
||||
# perl qr regex
|
||||
(?<!\$)\bqr(?:\{.*?\}|<.*?>|\(.*?\)|([|!/@#,;']).*?\g{-1})
|
||||
|
||||
# perl run
|
||||
perl(?:\s+-[a-zA-Z]\w*)+
|
||||
|
||||
# C network byte conversions
|
||||
(?:\d|\bh)to(?!ken)(?=[a-z])|to(?=[adhiklpun]\()
|
||||
|
||||
# Go regular expressions
|
||||
regexp?\.MustCompile\((?:`[^`]*`|".*"|'.*')\)
|
||||
|
||||
# regex choice
|
||||
\(\?:[^)]+\|[^)]+\)
|
||||
|
||||
# proto
|
||||
^\s*(\w+)\s\g{-1} =
|
||||
|
||||
# sed regular expressions
|
||||
sed 's/(?:[^/]*?[a-zA-Z]{3,}[^/]*?/){2}
|
||||
|
||||
# node packages
|
||||
(["'])@[^/'" ]+/[^/'" ]+\g{-1}
|
||||
|
||||
# go install
|
||||
go install(?:\s+[a-z]+\.[-@\w/.]+)+
|
||||
|
||||
# pom.xml
|
||||
<(?:group|artifact)Id>.*?<
|
||||
|
||||
# jetbrains schema https://youtrack.jetbrains.com/issue/RSRP-489571
|
||||
urn:shemas-jetbrains-com
|
||||
|
||||
# Debian changelog severity
|
||||
[-\w]+ \(.*\) (?:\w+|baseline|unstable|experimental); urgency=(?:low|medium|high|emergency|critical)\b
|
||||
|
||||
# kubernetes pod status lists
|
||||
# https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase
|
||||
\w+(?:-\w+)+\s+\d+/\d+\s+(?:Running|Pending|Succeeded|Failed|Unknown)\s+
|
||||
|
||||
# kubectl - pods in CrashLoopBackOff
|
||||
\w+-[0-9a-f]+-\w+\s+\d+/\d+\s+CrashLoopBackOff\s+
|
||||
|
||||
# kubernetes applications
|
||||
\.apps/[-\w]+
|
||||
|
||||
# kubernetes object suffix
|
||||
-[0-9a-f]{10}-\w{5}\s
|
||||
|
||||
# kubernetes crd patterns
|
||||
^\s*pattern: .*$
|
||||
|
||||
# posthog secrets
|
||||
([`'"])phc_[^"',]+\g{-1}
|
||||
|
||||
# xcode
|
||||
|
||||
# xcodeproject scenes
|
||||
(?:Controller|destination|(?:first|second)Item|ID|id)="\w{3}-\w{2}-\w{3}"
|
||||
|
||||
# xcode api botches
|
||||
customObjectInstantitationMethod
|
||||
|
||||
# msvc api botches
|
||||
PrependWithABINamepsace
|
||||
|
||||
# configure flags
|
||||
.* \| --\w{2,}.*?(?=\w+\s\w+)
|
||||
|
||||
# font awesome classes
|
||||
\.fa-[-a-z0-9]+
|
||||
|
||||
# bearer auth
|
||||
(['"])[Bb]ear[e][r] .{3,}?\g{-1}
|
||||
|
||||
# bearer auth
|
||||
\b[Bb]ear[e][r]:? [-a-zA-Z=;:/0-9+.]{3,}
|
||||
|
||||
# basic auth
|
||||
(['"])[Bb]asic [-a-zA-Z=;:/0-9+]{3,}\g{-1}
|
||||
|
||||
# basic auth
|
||||
: [Bb]asic [-a-zA-Z=;:/0-9+.]{3,}
|
||||
|
||||
# base64 encoded content
|
||||
([`'"])[-a-zA-Z=;:/0-9+]{3,}=\g{-1}
|
||||
# base64 encoded content in xml/sgml
|
||||
>[-a-zA-Z=;:/0-9+]{3,}=</
|
||||
# base64 encoded content, possibly wrapped in mime
|
||||
#(?:^|[\s=;:?])[-a-zA-Z=;:/0-9+]{50,}(?:[\s=;:?]|$)
|
||||
# base64 encoded json
|
||||
\beyJ[-a-zA-Z=;:/0-9+]+
|
||||
# base64 encoded pkcs
|
||||
\bMII[-a-zA-Z=;:/0-9+]+
|
||||
|
||||
# uuencoded
|
||||
#[!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_]{40,}
|
||||
|
||||
# DNS rr data
|
||||
(?:\d+\s+){3}(?:[-+/=.\w]{2,}\s*){1,2}
|
||||
|
||||
# encoded-word
|
||||
=\?[-a-zA-Z0-9"*%]+\?[BQ]\?[^?]{0,75}\?=
|
||||
|
||||
# numerator
|
||||
\bnumer\b(?=.*denom)
|
||||
|
||||
# Time Zones
|
||||
\b(?:Africa|Atlantic|America|Antarctica|Arctic|Asia|Australia|Europe|Indian|Pacific)(?:/[-\w]+)+
|
||||
|
||||
# linux kernel info
|
||||
^(?:bugs|flags|Features)\s+:.*
|
||||
|
||||
# systemd mode
|
||||
systemd.*?running in system mode \([-+].*\)$
|
||||
|
||||
# Lorem
|
||||
# Update Lorem based on your content (requires `ge` and `w` from https://github.com/jsoref/spelling; and `review` from https://github.com/check-spelling/check-spelling/wiki/Looking-for-items-locally )
|
||||
# grep '^[^#].*lorem' .github/actions/spelling/patterns.txt|perl -pne 's/.*i..\?://;s/\).*//' |tr '|' "\n"|sort -f |xargs -n1 ge|perl -pne 's/^[^:]*://'|sort -u|w|sed -e 's/ .*//'|w|review -
|
||||
# Warning, while `(?i)` is very neat and fancy, if you have some binary files that aren't proper unicode, you might run into:
|
||||
# ... Operation "substitution (s///)" returns its argument for non-Unicode code point 0x1C19AE (the code point will vary).
|
||||
# ... You could manually change `(?i)X...` to use `[Xx]...`
|
||||
# ... or you could add the files to your `excludes` file (a version after 0.0.19 should identify the file path)
|
||||
(?:(?:\w|\s|[,.])*\b(?i)(?:amet|consectetur|cursus|dolor|eros|ipsum|lacus|libero|ligula|lorem|magna|neque|nulla|suscipit|tempus)\b(?:\w|\s|[,.])*)
|
||||
|
||||
# Non-English
|
||||
# Even repositories expecting pure English content can unintentionally have Non-English content... People will occasionally mistakenly enter [homoglyphs](https://en.wikipedia.org/wiki/Homoglyph) which are essentially typos, and using this pattern will mean check-spelling will not complain about them.
|
||||
#
|
||||
# If the content to be checked should be written in English and the only Non-English items will be people's names, then you can consider adding this.
|
||||
#
|
||||
# Alternatively, if you're using check-spelling v0.0.25+, and you would like to _check_ the Non-English content for spelling errors, you can. For information on how to do so, see:
|
||||
# https://docs.check-spelling.dev/Feature:-Configurable-word-characters.html#unicode
|
||||
[a-zA-Z]*[ÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź][a-zA-Z]{3}[a-zA-ZÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź]*|[a-zA-Z]{3,}[ÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź]|[ÀÁÂÃÄÅÆČÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæčçèéêëìíîïðñòóôõöøùúûüýÿĀāŁłŃńŅņŒœŚśŠšŜŝŸŽžź][a-zA-Z]{3,}
|
||||
|
||||
# highlighted letters
|
||||
\[[A-Z]\][a-z]+
|
||||
|
||||
# French
|
||||
# This corpus only had capital letters, but you probably want lowercase ones as well.
|
||||
\b[LN]'+[a-z]{2,}\b
|
||||
|
||||
# latex (check-spelling >= 0.0.22)
|
||||
\\\w{2,}\{
|
||||
|
||||
# American Mathematical Society (AMS) / Doxygen
|
||||
TeX/AMS
|
||||
|
||||
# File extensions
|
||||
\*\.[+\w]+,
|
||||
|
||||
# eslint
|
||||
"varsIgnorePattern": ".+"
|
||||
|
||||
# nolint
|
||||
nolint:\s*[\w,]+
|
||||
|
||||
# Windows short paths
|
||||
[/\\][^/\\]{5,6}~\d{1,2}(?=[/\\])
|
||||
|
||||
# Windows Resources with accelerators
|
||||
\b[A-Z]&[a-z]+\b(?!;)
|
||||
|
||||
# signed off by
|
||||
(?i)Signed-off-by: .*
|
||||
|
||||
# cygwin paths
|
||||
/cygdrive/[a-zA-Z]/(?:Program Files(?: \(.*?\)| ?)(?:/[-+.~\\/()\w ]+)*|[-+.~\\/()\w])+
|
||||
|
||||
# in check-spelling@v0.0.22+, printf markers aren't automatically consumed
|
||||
# printf markers
|
||||
(?<!\\)\\[nrt](?=[a-z]{2,})
|
||||
# alternate printf markers if you run into latex and friends
|
||||
(?<!\\)\\[nrt](?=[a-z]{2,})(?=.*['"`])
|
||||
|
||||
# Markdown anchor links
|
||||
\(#\S*?[a-zA-Z]\S*?\)
|
||||
|
||||
# apache
|
||||
a2(?:en|dis)
|
||||
|
||||
# weak e-tag
|
||||
W/"[^"]+"
|
||||
|
||||
# authors/credits
|
||||
^\*(?: [A-Z](?:\w+|\.)){2,} (?=\[|$)
|
||||
|
||||
# the negative lookahead here is to allow catching 'templatesz' as a misspelling
|
||||
# but to otherwise recognize a Windows path with \templates\foo.template or similar:
|
||||
\\(?:necessary|r(?:elease|eport|esolve[dr]?|esult)|t(?:arget|emplates?))(?![a-z])
|
||||
# ignore long runs of a single character:
|
||||
\b([A-Za-z])\g{-1}{3,}\b
|
||||
|
||||
# version suffix <word>v#
|
||||
(?:(?<=[A-Z]{2})V|(?<=[a-z]{2}|[A-Z]{2})v)\d+(?:\b|(?=[a-zA-Z_]))
|
||||
|
||||
# Compiler flags (Unix, Java/Scala)
|
||||
# Use if you have things like `-Pdocker` and want to treat them as `docker`
|
||||
#(?:^|[\t ,>"'`=(#])-(?:(?:J-|)[DPWXY]|[Llf])(?=[A-Z]{2,}|[A-Z][a-z]|[a-z]{2,})
|
||||
|
||||
# Compiler flags (Windows / PowerShell)
|
||||
# This is a subset of the more general compiler flags pattern.
|
||||
# It avoids matching `-Path` to prevent it from being treated as `ath`
|
||||
#(?:^|[\t ,"'`=(#])-(?:[DPL](?=[A-Z]{2,})|[WXYlf](?=[A-Z]{2,}|[A-Z][a-z]|[a-z]{2,}))
|
||||
|
||||
# Compiler flags (linker)
|
||||
,-B
|
||||
|
||||
# libraries
|
||||
(?:\b|_)[Ll]ib(?:re(?=office)|)(?!era[lt]|ero|erty|rar(?:i(?:an|es)|y))(?=[a-z])
|
||||
|
||||
# WWNN/WWPN (NAA identifiers)
|
||||
\b(?:0x)?10[0-9a-f]{14}\b|\b(?:0x|3)?[25][0-9a-f]{15}\b|\b(?:0x|3)?6[0-9a-f]{31}\b
|
||||
|
||||
# iSCSI iqn (approximate regex)
|
||||
\biqn\.[0-9]{4}-[0-9]{2}(?:[\.-][a-z][a-z0-9]*)*\b
|
||||
|
||||
# curl arguments
|
||||
\b(?:\\n|)curl(?:\.exe|)(?:\s+-[a-zA-Z]{1,2}\b)*(?:\s+-[a-zA-Z]{3,})(?:\s+-[a-zA-Z]+)*
|
||||
# set arguments
|
||||
\b(?:bash|sh|set)(?:\s+[-+][abefimouxE]{1,2})*\s+[-+][abefimouxE]{3,}(?:\s+[-+][abefimouxE]+)*
|
||||
# tar arguments
|
||||
\b(?:\\n|)g?tar(?:\.exe|)(?:(?:\s+--[-a-zA-Z]+|\s+-[a-zA-Z]+|\s[ABGJMOPRSUWZacdfh-pr-xz]+\b)(?:=[^ ]*|))+
|
||||
# tput arguments -- https://man7.org/linux/man-pages/man5/terminfo.5.html -- technically they can be more than 5 chars long...
|
||||
\btput\s+(?:(?:-[SV]|-T\s*\w+)\s+)*\w{3,5}\b
|
||||
# macOS temp folders
|
||||
/var/folders/\w\w/[+\w]+/(?:T|-Caches-)/
|
||||
# github runner temp folders
|
||||
/home/runner/work/_temp/[-_/a-z0-9]+
|
||||
96
.github/actions/spelling/excludes.txt
vendored
96
.github/actions/spelling/excludes.txt
vendored
@@ -1,96 +0,0 @@
|
||||
# See https://github.com/check-spelling/check-spelling/wiki/Configuration-Examples:-excludes
|
||||
(?:^|/)(?i)COPYRIGHT
|
||||
(?:^|/)(?i)LICEN[CS]E
|
||||
(?:^|/)(?i)third[-_]?party/
|
||||
(?:^|/)3rdparty/
|
||||
(?:^|/)generated/
|
||||
(?:^|/)go\.sum$
|
||||
(?:^|/)package(?:-lock|)\.json$
|
||||
(?:^|/)Pipfile$
|
||||
(?:^|/)pyproject.toml
|
||||
(?:^|/)vendor/
|
||||
(?:^|/|\b)requirements(?:-dev|-doc|-test|)\.txt$
|
||||
\.a$
|
||||
\.ai$
|
||||
\.all-contributorsrc$
|
||||
\.avi$
|
||||
\.bmp$
|
||||
\.bz2$
|
||||
\.cert?$|\.crt$
|
||||
\.class$
|
||||
\.coveragerc$
|
||||
\.crl$
|
||||
\.csr$
|
||||
\.dll$
|
||||
\.docx?$
|
||||
\.drawio$
|
||||
\.DS_Store$
|
||||
\.eot$
|
||||
\.eps$
|
||||
\.exe$
|
||||
\.gif$
|
||||
\.git-blame-ignore-revs$
|
||||
\.gitattributes$
|
||||
\.gitkeep$
|
||||
\.graffle$
|
||||
\.gz$
|
||||
\.icns$
|
||||
\.ico$
|
||||
\.ipynb$
|
||||
\.jar$
|
||||
\.jks$
|
||||
\.jpe?g$
|
||||
\.key$
|
||||
\.lib$
|
||||
\.lock$
|
||||
\.map$
|
||||
\.min\..
|
||||
\.mo$
|
||||
\.mod$
|
||||
\.mp[34]$
|
||||
\.o$
|
||||
\.ocf$
|
||||
\.otf$
|
||||
\.p12$
|
||||
\.parquet$
|
||||
\.pdf$
|
||||
\.pem$
|
||||
\.pfx$
|
||||
\.png$
|
||||
\.psd$
|
||||
\.pyc$
|
||||
\.pylintrc$
|
||||
\.qm$
|
||||
\.s$
|
||||
\.sig$
|
||||
\.so$
|
||||
\.svgz?$
|
||||
\.sys$
|
||||
\.tar$
|
||||
\.tgz$
|
||||
\.tiff?$
|
||||
\.ttf$
|
||||
\.wav$
|
||||
\.webm$
|
||||
\.webp$
|
||||
\.woff2?$
|
||||
\.xcf$
|
||||
\.xlsx?$
|
||||
\.xpm$
|
||||
\.xz$
|
||||
\.zip$
|
||||
^\.github/actions/spelling/
|
||||
^\Q.github/FUNDING.yml\E$
|
||||
^\Q.github/workflows/spelling.yml\E$
|
||||
^data/crawlers/
|
||||
^docs/blog/tags\.yml$
|
||||
^docs/docs/user/known-instances.md$
|
||||
^docs/manifest/.*$
|
||||
^docs/static/\.nojekyll$
|
||||
^lib/policy/config/testdata/bad/unparseable\.json$
|
||||
^internal/glob/glob_test.go$
|
||||
ignore$
|
||||
robots.txt
|
||||
^lib/localization/locales/.*\.json$
|
||||
^lib/localization/.*_test.go$
|
||||
^test/.*$
|
||||
385
.github/actions/spelling/expect.txt
vendored
385
.github/actions/spelling/expect.txt
vendored
@@ -1,385 +0,0 @@
|
||||
acs
|
||||
Actorified
|
||||
actorifiedstore
|
||||
actorify
|
||||
Aibrew
|
||||
alibaba
|
||||
alrest
|
||||
amazonbot
|
||||
anthro
|
||||
anubis
|
||||
anubistest
|
||||
apnic
|
||||
APNICRANDNETAU
|
||||
Applebot
|
||||
archlinux
|
||||
asnc
|
||||
asnchecker
|
||||
asns
|
||||
aspirational
|
||||
atuin
|
||||
azuretools
|
||||
badregexes
|
||||
bbolt
|
||||
bdba
|
||||
berr
|
||||
bezier
|
||||
bingbot
|
||||
Bitcoin
|
||||
bitrate
|
||||
Bluesky
|
||||
blueskybot
|
||||
boi
|
||||
Bokm
|
||||
botnet
|
||||
botstopper
|
||||
BPort
|
||||
Brightbot
|
||||
broked
|
||||
buildah
|
||||
byteslice
|
||||
Bytespider
|
||||
cachebuster
|
||||
cachediptoasn
|
||||
Caddyfile
|
||||
caninetools
|
||||
Cardyb
|
||||
celchecker
|
||||
celphase
|
||||
cerr
|
||||
certresolver
|
||||
cespare
|
||||
CGNAT
|
||||
cgr
|
||||
chainguard
|
||||
chall
|
||||
challengemozilla
|
||||
challengetest
|
||||
checkpath
|
||||
checkresult
|
||||
chibi
|
||||
cidranger
|
||||
ckie
|
||||
cloudflare
|
||||
Codespaces
|
||||
confd
|
||||
connnection
|
||||
containerbuild
|
||||
containerregistry
|
||||
coreutils
|
||||
Cotoyogi
|
||||
Cromite
|
||||
crt
|
||||
Cscript
|
||||
daemonizing
|
||||
dayjob
|
||||
DDOS
|
||||
Debian
|
||||
debrpm
|
||||
decaymap
|
||||
devcontainers
|
||||
Diffbot
|
||||
discordapp
|
||||
discordbot
|
||||
distros
|
||||
dnf
|
||||
dnsbl
|
||||
dnserr
|
||||
domainhere
|
||||
dracula
|
||||
dronebl
|
||||
droneblresponse
|
||||
dropin
|
||||
dsilence
|
||||
duckduckbot
|
||||
eerror
|
||||
ellenjoe
|
||||
emacs
|
||||
enbyware
|
||||
etld
|
||||
everyones
|
||||
evilbot
|
||||
evilsite
|
||||
expressionorlist
|
||||
externalagent
|
||||
externalfetcher
|
||||
extldflags
|
||||
facebookgo
|
||||
Factset
|
||||
fastcgi
|
||||
fediverse
|
||||
ffprobe
|
||||
financials
|
||||
finfos
|
||||
Firecrawl
|
||||
flagenv
|
||||
Fordola
|
||||
forgejo
|
||||
forwardauth
|
||||
fsys
|
||||
fullchain
|
||||
gaissmai
|
||||
Galvus
|
||||
geoip
|
||||
geoipchecker
|
||||
gha
|
||||
GHSA
|
||||
Ghz
|
||||
gipc
|
||||
gitea
|
||||
godotenv
|
||||
goland
|
||||
gomod
|
||||
goodbot
|
||||
googlebot
|
||||
gopsutil
|
||||
govulncheck
|
||||
goyaml
|
||||
GPG
|
||||
GPT
|
||||
gptbot
|
||||
Graphene
|
||||
grpcprom
|
||||
grw
|
||||
gzw
|
||||
Hashcash
|
||||
hashrate
|
||||
headermap
|
||||
healthcheck
|
||||
healthz
|
||||
hec
|
||||
helpdesk
|
||||
Hetzner
|
||||
hmc
|
||||
homelab
|
||||
hostable
|
||||
htmlc
|
||||
htmx
|
||||
httpdebug
|
||||
Huawei
|
||||
huawei
|
||||
hypertext
|
||||
iaskspider
|
||||
iaso
|
||||
iat
|
||||
ifm
|
||||
Imagesift
|
||||
imgproxy
|
||||
impressum
|
||||
inbox
|
||||
ingressed
|
||||
inp
|
||||
internets
|
||||
IPTo
|
||||
iptoasn
|
||||
isp
|
||||
iss
|
||||
isset
|
||||
ivh
|
||||
Jenomis
|
||||
JGit
|
||||
jhjj
|
||||
joho
|
||||
journalctl
|
||||
jshelter
|
||||
JWTs
|
||||
kagi
|
||||
kagibot
|
||||
Keyfunc
|
||||
keypair
|
||||
KHTML
|
||||
kinda
|
||||
KUBECONFIG
|
||||
lcj
|
||||
ldflags
|
||||
letsencrypt
|
||||
Lexentale
|
||||
lgbt
|
||||
licend
|
||||
licstart
|
||||
lightpanda
|
||||
limsa
|
||||
Linting
|
||||
LLU
|
||||
loadbalancer
|
||||
lol
|
||||
lominsa
|
||||
maintainership
|
||||
malware
|
||||
mcr
|
||||
memes
|
||||
metarefresh
|
||||
metrix
|
||||
mimi
|
||||
Minfilia
|
||||
mistralai
|
||||
mnt
|
||||
Mojeek
|
||||
mojeekbot
|
||||
mozilla
|
||||
nbf
|
||||
nepeat
|
||||
netsurf
|
||||
nginx
|
||||
nicksnyder
|
||||
nobots
|
||||
NONINFRINGEMENT
|
||||
nosleep
|
||||
nullglob
|
||||
oci
|
||||
OCOB
|
||||
ogtag
|
||||
oklch
|
||||
omgili
|
||||
omgilibot
|
||||
openai
|
||||
opengraph
|
||||
openrc
|
||||
oswald
|
||||
pag
|
||||
palemoon
|
||||
Pangu
|
||||
parseable
|
||||
passthrough
|
||||
Patreon
|
||||
pgrep
|
||||
phrik
|
||||
pidfile
|
||||
pids
|
||||
pipefail
|
||||
pki
|
||||
podkova
|
||||
podman
|
||||
Postgre
|
||||
poststart
|
||||
prebaked
|
||||
privkey
|
||||
promauto
|
||||
promhttp
|
||||
proofofwork
|
||||
publicsuffix
|
||||
purejs
|
||||
pwcmd
|
||||
pwuser
|
||||
qualys
|
||||
qwant
|
||||
qwantbot
|
||||
rac
|
||||
rawler
|
||||
rcvar
|
||||
rdb
|
||||
redhat
|
||||
redir
|
||||
redirectscheme
|
||||
refactors
|
||||
reputational
|
||||
risc
|
||||
ruleset
|
||||
runlevels
|
||||
RUnlock
|
||||
runtimedir
|
||||
runtimedirectory
|
||||
Ryzen
|
||||
sas
|
||||
sasl
|
||||
screenshots
|
||||
searchbot
|
||||
searx
|
||||
sebest
|
||||
secretplans
|
||||
Semrush
|
||||
Seo
|
||||
setsebool
|
||||
shellcheck
|
||||
shirou
|
||||
shopt
|
||||
Sidetrade
|
||||
simprint
|
||||
sitemap
|
||||
sls
|
||||
sni
|
||||
Spambot
|
||||
sparkline
|
||||
spyderbot
|
||||
srv
|
||||
stackoverflow
|
||||
startprecmd
|
||||
stoppostcmd
|
||||
storetest
|
||||
subgrid
|
||||
subr
|
||||
subrequest
|
||||
SVCNAME
|
||||
tagline
|
||||
tarballs
|
||||
tarrif
|
||||
taviso
|
||||
tbn
|
||||
tbr
|
||||
techaro
|
||||
techarohq
|
||||
templ
|
||||
templruntime
|
||||
testarea
|
||||
Thancred
|
||||
thoth
|
||||
thothmock
|
||||
Tik
|
||||
Timpibot
|
||||
TLog
|
||||
traefik
|
||||
trunc
|
||||
uberspace
|
||||
Unbreak
|
||||
unbreakdocker
|
||||
unifiedjs
|
||||
unmarshal
|
||||
unparseable
|
||||
uvx
|
||||
UXP
|
||||
valkey
|
||||
Varis
|
||||
Velen
|
||||
vendored
|
||||
vhosts
|
||||
VKE
|
||||
vnd
|
||||
VPS
|
||||
Vultr
|
||||
weblate
|
||||
webmaster
|
||||
webpage
|
||||
websecure
|
||||
websites
|
||||
Webzio
|
||||
whois
|
||||
wildbase
|
||||
withthothmock
|
||||
wolfbeast
|
||||
wordpress
|
||||
Workaround
|
||||
workaround
|
||||
workdir
|
||||
wpbot
|
||||
XCircle
|
||||
xeiaso
|
||||
xeserv
|
||||
xesite
|
||||
xess
|
||||
xff
|
||||
XForwarded
|
||||
XNG
|
||||
XOB
|
||||
XOriginal
|
||||
XReal
|
||||
yae
|
||||
YAMLTo
|
||||
Yda
|
||||
yeet
|
||||
yeetfile
|
||||
yourdomain
|
||||
yyz
|
||||
Zenos
|
||||
zizmor
|
||||
zombocom
|
||||
zos
|
||||
463
.github/actions/spelling/line_forbidden.patterns
vendored
463
.github/actions/spelling/line_forbidden.patterns
vendored
@@ -1,463 +0,0 @@
|
||||
# reject `m_data` as VxWorks defined it and that breaks things if it's used elsewhere
|
||||
# see [fprime](https://github.com/nasa/fprime/commit/d589f0a25c59ea9a800d851ea84c2f5df02fb529)
|
||||
# and [Qt](https://github.com/qtproject/qt-solutions/blame/fb7bc42bfcc578ff3fa3b9ca21a41e96eb37c1c7/qtscriptclassic/src/qscriptbuffer_p.h#L46)
|
||||
#\bm_data\b
|
||||
|
||||
# Were you debugging using a framework with `fit()`?
|
||||
# If you have a framework that uses `it()` for testing and `fit()` for debugging a specific test,
|
||||
# you might not want to check in code where you skip all the other tests.
|
||||
#\bfit\(
|
||||
|
||||
# English does not use a hyphen between adverbs and nouns
|
||||
# https://twitter.com/nyttypos/status/1894815686192685239
|
||||
(?:^|\s)[A-Z]?[a-z]+ly-(?=[a-z]{3,})(?:[.,?!]?\s|$)
|
||||
|
||||
# Don't use `requires that` + `to be`
|
||||
# https://twitter.com/nyttypos/status/1894816551435641027
|
||||
\brequires that \w+\b[^.]+to be\b
|
||||
|
||||
# A fully parenthetical sentence’s period goes inside the parentheses, not outside.
|
||||
# https://twitter.com/nyttypos/status/1898844061873639490
|
||||
#\([A-Z][a-z]{2,}(?: [a-z]+){3,}\)\.\s
|
||||
|
||||
# Complete sentences in parentheticals should not have a space before the period.
|
||||
\s\.\)(?!.*\}\})
|
||||
|
||||
# Should be `HH:MM:SS`
|
||||
\bHH:SS:MM\b
|
||||
|
||||
# Should be `86400` (seconds in a standard day)
|
||||
\b84600\b(?:.*\bday\b)
|
||||
|
||||
# Should probably be `2006-01-02` (yyyy-mm-dd)
|
||||
# Assuming that the time is being passed to https://go.dev/src/time/format.go
|
||||
\b2006-02-01\b
|
||||
|
||||
# Should probably be `YYYYMMDD`
|
||||
\b[Yy]{4}[Dd]{2}[Mm]{2}(?!.*[Yy]{4}[Dd]{2}[Mm]{2}).*$
|
||||
|
||||
# Should be `a priori` or `and prior`
|
||||
(?i)(?<!posteriori)\sand priori\s
|
||||
|
||||
# Should be `a`
|
||||
\san (?=(?:[b-df-gj-np-rtv-xz]|h(?!our|tml|ttp)|s(?!sh|vg))[a-z])
|
||||
|
||||
# Should only be one of `a`, `an`, or `the`
|
||||
\b(?:(?:an?|the)\s+){2,}\b
|
||||
|
||||
# Should only be `are` or `can`, not both
|
||||
\b(?:(?:are|can)\s+){2,}\b
|
||||
|
||||
# Should probably be `ABCDEFGHIJKLMNOPQRSTUVWXYZ`
|
||||
(?i)(?!ABCDEFGHIJKLMNOPQRSTUVWXYZ)ABC[A-Z]{21}YZ
|
||||
|
||||
# Should be `anymore`
|
||||
\bany more[,.]
|
||||
|
||||
# Should be `Ask`
|
||||
(?:^|[.?]\s+)As\s+[A-Z][a-z]{2,}\s[^.?]*?(?:how|if|wh\w+)\b
|
||||
|
||||
# Should be `at one fell swoop`
|
||||
# and only when talking about killing, not some other completion
|
||||
# Act 4 Scene 3, Macbeth
|
||||
# https://www.opensourceshakespeare.org/views/plays/play_view.php?WorkID=macbeth&Act=4&Scene=3&Scope=scene
|
||||
\bin one fell s[lw]?oop\b
|
||||
|
||||
# Should be `'`
|
||||
(?i)\b(?:(?:i|s?he|they|what|who|you)[`"]ll|(?:are|ca|did|do|does|ha[ds]|have|is|should|were|wo|would)n[`"]t|(?:s?he|let|that|there|what|where|who)[`"]s|(?:i|they|we|what|who|you)[`"]ve)\b
|
||||
|
||||
# Should be `background` / `intro text` / `introduction` / `prologue` unless it's a brand or relates to _subterfuge_
|
||||
(?i)\bpretext\b
|
||||
|
||||
# Should be `branches`
|
||||
# ... unless it's really about the meal that replaces breakfast and lunch.
|
||||
\b[Bb]runches\b
|
||||
|
||||
# Should be `briefcase`
|
||||
\bbrief-case\b
|
||||
|
||||
# Should be `by far` or `far and away`
|
||||
\bby far and away\b
|
||||
|
||||
# Should be `can, not only ..., ... also...`
|
||||
\bcan not only.*can also\b
|
||||
|
||||
# Should be `cannot` (or `can't`)
|
||||
# See https://www.grammarly.com/blog/cannot-or-can-not/
|
||||
# > Don't use `can not` when you mean `cannot`. The only time you're likely to see `can not` written as separate words is when the word `can` happens to precede some other phrase that happens to start with `not`.
|
||||
# > `Can't` is a contraction of `cannot`, and it's best suited for informal writing.
|
||||
# > In formal writing and where contractions are frowned upon, use `cannot`.
|
||||
# > It is possible to write `can not`, but you generally find it only as part of some other construction, such as `not only . . . but also.`
|
||||
# - if you encounter such a case, add a pattern for that case to patterns.txt.
|
||||
\b[Cc]an not\b(?! only\b)
|
||||
|
||||
# Should be `chart`
|
||||
(?i)\bhelm\b.*\bchard\b
|
||||
|
||||
# Do not use `(click) here` links
|
||||
# For more information, see:
|
||||
# * https://www.w3.org/QA/Tips/noClickHere
|
||||
# * https://webaim.org/techniques/hypertext/link_text
|
||||
# * https://granicus.com/blog/why-click-here-links-are-bad/
|
||||
# * https://heyoka.medium.com/dont-use-click-here-f32f445d1021
|
||||
(?i)(?:>|\[)(?:(?:click |)here|link|(?:read |)more)(?:</|\]\()
|
||||
|
||||
# Including "image of" or "picture of" in alt text is unnecessary.
|
||||
\balt=['"](?:an? |)(?:image|picture) of
|
||||
|
||||
# Alt text should be short
|
||||
\balt=(?:'[^']{126,}'|"[^"]{126,}")
|
||||
|
||||
# Should be `equals` to `is equal to`
|
||||
\bequals to\b
|
||||
|
||||
# Should be `ECMA` 262 (JavaScript)
|
||||
(?i)\bTS\/EMCA\b|\bEMCA(?: \d|\s*Script)|\bEMCA\b(?=.*\bTS\b)
|
||||
|
||||
# Should be `ECMA` 340 (Near Field Communications)
|
||||
(?i)EMCA[- ]340
|
||||
|
||||
# Should be `fall back`
|
||||
\bfallback(?= to)\b
|
||||
|
||||
# Should be `GitHub`
|
||||
(?<![&*.]|// |\b(?:from|import|type) )\bGithub\b(?![{()])
|
||||
|
||||
# Should be `GitLab`
|
||||
(?<![&*.]|// |\b(?:from|import|type) )\bGitlab\b(?![{()])
|
||||
|
||||
# Should probably be `https://`...
|
||||
# Markdown generally doesn't assume that links are to urls
|
||||
\]\(www\.\w
|
||||
|
||||
# Should be `JavaScript`
|
||||
\bJavascript\b
|
||||
|
||||
# Should be `macOS` or `Mac OS X` or ...
|
||||
\bMacOS\b
|
||||
|
||||
# Should be `Microsoft`
|
||||
\bMicroSoft\b
|
||||
|
||||
# Should be `OAuth`
|
||||
(?:^|[^-/*$])[ '"]oAuth(?: [a-z]|\d+ |[^ a-zA-Z0-9:;_.()])
|
||||
|
||||
# Should be `RabbitMQ`
|
||||
\bRabbitmq\b
|
||||
|
||||
# Should be `TensorFlow`
|
||||
\bTensorflow\b
|
||||
|
||||
# Should be `TypeScript`
|
||||
\bTypescript\b
|
||||
|
||||
# Should be `another`
|
||||
\ban[- ]other(?!-)\b
|
||||
|
||||
# Should be `case-(in)sensitive`
|
||||
\bcase (?:in|)sensitive\b
|
||||
|
||||
# Should be `coinciding`
|
||||
\bco-inciding\b
|
||||
|
||||
# Should be `deprecation warning(s)`
|
||||
\b[Dd]epreciation [Ww]arnings?\b
|
||||
|
||||
# Should be `greater than`
|
||||
\bgreater then\b
|
||||
|
||||
# Should be `has`
|
||||
\b[Ii]t only have\b
|
||||
|
||||
# Should be `here-in`, `the`, `them`, `this`, `these` or reworded in some other way
|
||||
\bthe here(?:\.|,| (?!and|defined))
|
||||
|
||||
# Should be `greater than`
|
||||
\bhigher than\b
|
||||
|
||||
# Should be `ID` (unless it's a flag/property)
|
||||
(?<![-\.])\bId\b(?![(])
|
||||
|
||||
# Should be `in front of`
|
||||
\bin from of\b
|
||||
|
||||
# Should be `into`
|
||||
# when not phrasal and when `in order to` would be wrong:
|
||||
# https://thewritepractice.com/into-vs-in-to/
|
||||
\sin to\s(?!if\b)
|
||||
|
||||
# Should be `use`
|
||||
\sin used by\b
|
||||
|
||||
# Should be `in-depth` if used as an adjective (but `in depth` when used as an adverb)
|
||||
\bin depth\s(?!rather\b)\w{6,}
|
||||
|
||||
# Should be `in-flight` or `on the fly` (unless actually talking about airline flights)
|
||||
\bon[- ]flight\b(?!=\s+(?:(?:\w{2}|)\d+|availability|booking|computer|data|delay|departure|management|performance|radar|reservation|scheduling|software|status|ticket|time|type|.*(?:hotel|taxi)))
|
||||
|
||||
# Should be `is obsolete`
|
||||
\bis obsolescent\b
|
||||
|
||||
# Should be `it's` or `its`
|
||||
\bits['’]
|
||||
|
||||
# Should be `its`
|
||||
\bit's(?= own\b)
|
||||
|
||||
# Should be `its`
|
||||
\bit's(?= only purpose\b)
|
||||
|
||||
# Should be `for its` (possessive) or `because it is`
|
||||
\bfor it(?:'s| is)\b
|
||||
|
||||
# Should be `log in`
|
||||
\blogin to the
|
||||
|
||||
# Should be `long-standing`
|
||||
\blong standing\b
|
||||
|
||||
# `apt-key` is deprecated
|
||||
# ... instead you should be writing a pair of files:
|
||||
# ... * the gpg key added to a distinct key ring file based on your project/distro/key...
|
||||
# ... * the sources.list in a district file -- not simply appended to `/etc/apt/sources.list` -- (there is a newer format [DEB822](https://manpages.debian.org/bookworm/dpkg-dev/deb822.5.en.html)) that references the gpg key.
|
||||
# Consider:
|
||||
# ````sh
|
||||
# curl http://download.something.example.com/$DISTRO/Release.key | \
|
||||
# gpg --dearmor --yes --output /usr/share/keyrings/something-distro.gpg
|
||||
# echo "deb [signed-by=/usr/share/keyrings/something-distro.gpg] http://download.something.example.com/repositories/home:/$DISTRO ./" \
|
||||
# >> /etc/apt/sources.list.d/something-distro.list
|
||||
# ````
|
||||
\bapt-key add\b
|
||||
|
||||
# Should be `nearby`
|
||||
\bnear by\b
|
||||
|
||||
# Should probably be a person named `Nick` or the abbreviation `NIC`
|
||||
\bNic\b
|
||||
|
||||
# Should be `not supposed`
|
||||
\bsupposed not\b
|
||||
|
||||
# Should probably be `much more`
|
||||
\bmore much\b
|
||||
|
||||
# Should be `perform its`
|
||||
\bperform it's\b
|
||||
|
||||
# Should be `opt-in`
|
||||
(?<!\scan|for)(?<!\smust)(?<!\sif)\sopt in\s
|
||||
|
||||
# Should be `less than`
|
||||
\bless then\b
|
||||
|
||||
# Should be `load balancer`
|
||||
\b[Ll]oud balancer
|
||||
|
||||
# Should be `moot`
|
||||
\bmute point\b
|
||||
|
||||
# Should be `one of`
|
||||
(?<!-)\bon of\b
|
||||
|
||||
# Should be `on the other hand`
|
||||
\b(?i)on another hand\b
|
||||
|
||||
# Reword to `on at runtime` or `enabled at launch`
|
||||
# The former if you mean it can be changed dynamically.
|
||||
# The latter if you mean that it can be changed without recompiling but not after the program starts.
|
||||
\bswitched on runtime\b
|
||||
|
||||
# Should be `Of course,`
|
||||
[?.!]\s+Of course\s(?=[-\w\s]+[.?;!,])
|
||||
|
||||
# Most people only have two hands. Reword.
|
||||
\b(?i)on the third hand\b
|
||||
|
||||
# Should be `OpenShift`
|
||||
\bOpenshift\b
|
||||
|
||||
# Should be `otherwise`
|
||||
\bother[- ]wise\b
|
||||
|
||||
# Should be `; otherwise` or `. Otherwise`
|
||||
# https://study.com/learn/lesson/otherwise-in-a-sentence.html
|
||||
, [Oo]therwise\b
|
||||
|
||||
# Should probably be `Otherwise,`
|
||||
(?<=\. )Otherwise\s
|
||||
|
||||
# Should be `or (more|less)`
|
||||
\bore (?:more|less)\b
|
||||
|
||||
# Should be `rather than`
|
||||
\brather then\b
|
||||
|
||||
# Should be `Red Hat`
|
||||
\bRed[Hh]at\b
|
||||
|
||||
# Should be `regardless, ...` or `regardless of (whether)`
|
||||
\b[Rr]egardless if you\b
|
||||
|
||||
# Should be `self-signed`
|
||||
\bself signed\b
|
||||
|
||||
# Should be `SendGrid`
|
||||
\bSendgrid\b
|
||||
|
||||
# Should be `set up` (`setup` is a noun / `set up` is a verb)
|
||||
\b[Ss]etup(?= (?:an?|the)\b)
|
||||
|
||||
# Should be `state`
|
||||
\bsate(?=\b|[A-Z])|(?<=[a-z])Sate(?=\b|[A-Z])|(?<=[A-Z]{2})Sate(?=\b|[A-Z])
|
||||
|
||||
# Should be `no longer needed`
|
||||
\bno more needed\b(?! than\b)
|
||||
|
||||
# Should be `<see|look> below for the`
|
||||
(?i)\bfind below the\b
|
||||
|
||||
# Should be `then any` unless there's a comparison before the `,`
|
||||
, than any\b
|
||||
|
||||
# Should be `did not exist`
|
||||
\bwere not existent\b
|
||||
|
||||
# Should be `nonexistent`
|
||||
\bnon existing\b
|
||||
|
||||
# Should be `nonexistent`
|
||||
\b[Nn]o[nt][- ]existent\b
|
||||
|
||||
# Should be `our`
|
||||
\bspending out time\b
|
||||
|
||||
# Should be `@brief` / `@details` / `@param` / `@return` / `@retval`
|
||||
(?:^\s*|(?:\*|//|/*)\s+`)[\\@](?:breif|(?:detail|detials)|(?:params(?!\.)|prama?)|ret(?:uns?)|retvl)\b
|
||||
|
||||
# Should be `more than` or `more, then`
|
||||
\bmore then\b
|
||||
|
||||
# Should be `Pipeline`/`pipeline`
|
||||
(?:(?<=\b|[A-Z])p|P)ipeLine(?:\b|(?=[A-Z]))
|
||||
|
||||
# Should be `preexisting`
|
||||
[Pp]re[- ]existing
|
||||
|
||||
# Should be `preempt`
|
||||
[Pp]re[- ]empt\b
|
||||
|
||||
# Should be `preemptively`
|
||||
[Pp]re[- ]emptively
|
||||
|
||||
# Should be `prepopulate`
|
||||
[Pp]re[- ]populate
|
||||
|
||||
# Should be `prerequisite`
|
||||
[Pp]re[- ]requisite
|
||||
|
||||
# Should be `recently changed` or `recent changes`
|
||||
[Rr]ecent changed
|
||||
|
||||
# Should be `reentrancy`
|
||||
[Rr]e[- ]entrancy
|
||||
|
||||
# Should be `reentrant`
|
||||
[Rr]e[- ]entrant
|
||||
|
||||
# Should be `room for`
|
||||
\brooms for (?!lease|rent|sale)
|
||||
|
||||
# Should be `socioeconomic`
|
||||
# https://dictionary.cambridge.org/us/dictionary/english/socioeconomic
|
||||
socio-economic
|
||||
|
||||
# Should be `strong suit`
|
||||
\b(?:my|his|her|their) strong suite\b
|
||||
|
||||
# Should probably be `temperatures` unless actually talking about thermal drafts (things birds may fly on)
|
||||
\bthermals\b
|
||||
|
||||
# Should be `there are` or `they are` (or `they're`)
|
||||
(?i)\btheir are\b
|
||||
|
||||
# Should be `understand`
|
||||
\bunder stand\b
|
||||
|
||||
# Should be `URI` or `uri` unless it refers to a person named `Uri` (or a flag)
|
||||
(?<![-\.])\bUri\b(?![(])
|
||||
|
||||
# Should be `it uses is`
|
||||
/\bis uses is\b/
|
||||
|
||||
# Should be `uses it as`
|
||||
(?:^|\. |and )uses is as (?!an?\b|follows|livestock|[^.]+\s+as\b)
|
||||
|
||||
# Should be `was`
|
||||
\bhas been(?= removed in v?\d)
|
||||
|
||||
# Should be `where`
|
||||
\bwere they are\b
|
||||
|
||||
# Should be `why`
|
||||
, way(?= is [^.]*\?)
|
||||
|
||||
# should be `vCenter`
|
||||
\bV[Cc]enter\b
|
||||
|
||||
# Should be `VM`
|
||||
\bVm\b
|
||||
|
||||
# Should be `walkthrough(s)`
|
||||
\bwalk-throughs?\b
|
||||
|
||||
# Should be `we'll`
|
||||
\bwe 'll\b
|
||||
|
||||
# Should be `whereas`
|
||||
\bwhere as\b
|
||||
|
||||
# Should be `WinGet`
|
||||
\bWinget\b
|
||||
|
||||
# Should be `without` (unless `out` is a modifier of the next word)
|
||||
\bwith out\b(?!-)
|
||||
|
||||
# Should be `work around`
|
||||
\b[Ww]orkaround(?= an?\b)
|
||||
|
||||
# Should be `workarounds`
|
||||
\bwork[- ]arounds\b
|
||||
|
||||
# Should be `workaround`
|
||||
(?:(?:[Aa]|[Tt]he|ugly)\swork[- ]around\b|\swork[- ]around\s+for)
|
||||
|
||||
# Should be `worst`
|
||||
(?i)worse-case
|
||||
|
||||
# Should be `you are not` or reworded
|
||||
\byour not\b
|
||||
|
||||
# Should be `(coarse|fine)-grained`
|
||||
\b(?:coarse|fine) grained\b
|
||||
|
||||
# Homoglyph (Cyrillic) should be `A`/`B`/`C`/`E`/`H`/`I`/`I`/`J`/`K`/`M`/`O`/`P`/`S`/`T`/`Y`
|
||||
# It's possible that your content is intentionally mixing Cyrillic and Latin scripts, but if it isn't, you definitely want to correct this.
|
||||
(?<=[A-Z]{2})[АВСЕНІӀЈКМОРЅТУ]|[АВСЕНІӀЈКМОРЅТУ](?=[A-Z]+(?:\b|[a-z]+)|[a-z]+(?:[^a-z]|$))
|
||||
|
||||
# Homoglyph (Cyrillic) should be `a`/`b`/`c`/`e`/`o`/`p`/`x`/`y`
|
||||
# It's possible that your content is intentionally mixing Cyrillic and Latin scripts, but if it isn't, you definitely want to correct this.
|
||||
[авсеорху](?=[A-Za-z]{2,})|(?<=[A-Za-z]{2})[авсеорху]|(?<=[A-Za-z])[авсеорху](?=[A-Za-z])
|
||||
|
||||
# Should be `neither/nor` -- or reword
|
||||
(?!<do )\bnot\b([^.?!"/(](?!neither|,.*?,))+\bnor\b
|
||||
|
||||
# Should be `neither/nor` (plus rewording the beginning)
|
||||
# This is probably a double negative...
|
||||
\bnot\b[^.?!"/(]*\bneither\b[^.?!"/(]*\bnor\b
|
||||
|
||||
# In English, duplicated words are generally mistakes
|
||||
# There are a few exceptions (e.g. "that that").
|
||||
# If the highlighted doubled word pair is in:
|
||||
# * code, write a pattern to mask it.
|
||||
# * prose, have someone read the English before you dismiss this error.
|
||||
\s([A-Z]{3,}|[A-Z][a-z]{2,}|[a-z]{3,})\s\g{-1}\s
|
||||
138
.github/actions/spelling/patterns.txt
vendored
138
.github/actions/spelling/patterns.txt
vendored
@@ -1,138 +0,0 @@
|
||||
# See https://github.com/check-spelling/check-spelling/wiki/Configuration-Examples:-patterns
|
||||
|
||||
# Automatically suggested patterns
|
||||
|
||||
# hit-count: 198 file-count: 52
|
||||
# https/http/file urls
|
||||
(?:\b(?:https?|ftp|file)://)[-A-Za-z0-9+&@#/*%?=~_|!:,.;]+[-A-Za-z0-9+&@#/*%=~_|]
|
||||
|
||||
# hit-count: 22 file-count: 8
|
||||
# GitHub actions
|
||||
\buses:\s+[-\w.]+/[-\w./]+@[-\w.]+
|
||||
|
||||
# hit-count: 19 file-count: 5
|
||||
# libraries
|
||||
(?:\b|_)[Ll]ib(?:re(?=office)|era(?![lt])|)(?!ero|erty|rar(?:i(?:an|es)|y))(?=[a-z])
|
||||
|
||||
# hit-count: 17 file-count: 8
|
||||
# version suffix <word>v#
|
||||
(?:(?<=[A-Z]{2})V|(?<=[a-z]{2}|[A-Z]{2})v)\d+(?:\b|(?=[a-zA-Z_]))
|
||||
|
||||
# hit-count: 15 file-count: 7
|
||||
# container images
|
||||
image: [-\w./:@]+
|
||||
|
||||
# hit-count: 14 file-count: 9
|
||||
# imports
|
||||
^import\s+(?:(?:static|type)\s+|)(?:[\w.]|\{\s*\w*?(?:,\s*(?:\w*|\*))+\s*\})+
|
||||
|
||||
# hit-count: 11 file-count: 2
|
||||
# hex digits including css/html color classes:
|
||||
(?:[\\0][xX]|\\u|[uU]\+|#x?|%23|&H)[0-9_a-fA-FgGrR]*?[a-fA-FgGrR]{2,}[0-9_a-fA-FgGrR]*(?:[uUlL]{0,3}|[iu]\d+)\b
|
||||
|
||||
# hit-count: 8 file-count: 5
|
||||
# node packages
|
||||
(["'])@[^/'" ]+/[^/'" ]+\g{-1}
|
||||
|
||||
# hit-count: 5 file-count: 2
|
||||
# css fonts
|
||||
\bfont(?:-family|):[^;}]+
|
||||
|
||||
# hit-count: 4 file-count: 4
|
||||
# set arguments
|
||||
\b(?:bash|sh|set)(?:\s+[-+][abefimouxE]{1,2})*\s+[-+][abefimouxE]{3,}(?:\s+[-+][abefimouxE]+)*
|
||||
|
||||
# hit-count: 4 file-count: 2
|
||||
# css url wrappings
|
||||
\burl\([^)]+\)
|
||||
|
||||
# hit-count: 2 file-count: 2
|
||||
# C network byte conversions
|
||||
(?:\d|\bh)to(?!ken)(?=[a-z])|to(?=[adhiklpun]\()
|
||||
|
||||
# hit-count: 2 file-count: 1
|
||||
# GitHub SHA refs
|
||||
\[([0-9a-f]+)\]\(https://(?:www\.|)github.com/[-\w]+/[-\w]+/commit/\g{-1}[0-9a-f]*
|
||||
|
||||
# hit-count: 1 file-count: 1
|
||||
# copyright
|
||||
Copyright (?:\([Cc]\)|)(?:[-\d, ]|and)+(?: [A-Z][a-z]+ [A-Z][a-z]+,?)+
|
||||
|
||||
# hit-count: 1 file-count: 1
|
||||
# IPv6
|
||||
\b(?:[0-9a-fA-F]{0,4}:){3,7}[0-9a-fA-F]{0,4}\b
|
||||
|
||||
# hit-count: 1 file-count: 1
|
||||
# Docker images
|
||||
^\s*(?i)FROM\s+\S+:\S+(?:\s+AS\s+\S+|)
|
||||
|
||||
# hit-count: 1 file-count: 1
|
||||
# perl run
|
||||
perl(?:\s+-[a-zA-Z]\w*)+
|
||||
|
||||
# hit-count: 1 file-count: 1
|
||||
# go install
|
||||
go install(?:\s+[a-z]+\.[-@\w/.]+)+
|
||||
|
||||
# hit-count: 1 file-count: 1
|
||||
# in check-spelling@v0.0.22+, printf markers aren't automatically consumed
|
||||
# printf markers
|
||||
(?<!\\)\\[nrt](?=[a-z]{2,})
|
||||
|
||||
# hit-count: 1 file-count: 1
|
||||
# tar arguments
|
||||
\b(?:\\n|)g?tar(?:\.exe|)(?:(?:\s+--[-a-zA-Z]+|\s+-[a-zA-Z]+|\s[ABGJMOPRSUWZacdfh-pr-xz]+\b)(?:=[^ ]*|))+
|
||||
|
||||
# Questionably acceptable forms of `in to`
|
||||
# Personally, I prefer `log into`, but people object
|
||||
# https://www.tprteaching.com/log-into-log-in-to-login/
|
||||
\b(?:(?:[Ll]og(?:g(?=[a-z])|)|[Ss]ign)(?:ed|ing)?) in to\b
|
||||
|
||||
# to opt in
|
||||
\bto opt in\b
|
||||
|
||||
# pass(ed|ing) in
|
||||
\bpass(?:ed|ing) in\b
|
||||
|
||||
# acceptable duplicates
|
||||
# ls directory listings
|
||||
[-bcdlpsw](?:[-r][-w][-SsTtx]){3}[\.+*]?\s+\d+\s+\S+\s+\S+\s+[.\d]+(?:[KMGT]|)\s+
|
||||
# mount
|
||||
\bmount\s+-t\s+(\w+)\s+\g{-1}\b
|
||||
# C types and repeated CSS values
|
||||
\s(auto|buffalo|center|div|inherit|long|LONG|none|normal|solid|thin|transparent|very)(?: \g{-1})+\s
|
||||
# C enum and struct
|
||||
\b(?:enum|struct)\s+(\w+)\s+\g{-1}\b
|
||||
# go templates
|
||||
\s(\w+)\s+\g{-1}\s+\`(?:graphql|inject|json|yaml):
|
||||
# doxygen / javadoc / .net
|
||||
(?:[\\@](?:brief|defgroup|groupname|link|t?param|return|retval)|(?:public|private|\[Parameter(?:\(.+\)|)\])(?:\s+(?:static|override|readonly|required|virtual))*)(?:\s+\{\w+\}|)\s+(\w+)\s+\g{-1}\s
|
||||
|
||||
# macOS file path
|
||||
(?:Contents\W+|(?!iOS)/)MacOS\b
|
||||
|
||||
# Python package registry has incorrect spelling for macOS / Mac OS X
|
||||
"Operating System :: MacOS :: MacOS X"
|
||||
|
||||
# "company" in Germany
|
||||
\bGmbH\b
|
||||
|
||||
# IntelliJ
|
||||
\bIntelliJ\b
|
||||
|
||||
# Commit message -- Signed-off-by and friends
|
||||
^\s*(?:(?:Based-on-patch|Co-authored|Helped|Mentored|Reported|Reviewed|Signed-off)-by|Thanks-to): (?:[^<]*<[^>]*>|[^<]*)\s*$
|
||||
|
||||
# Autogenerated revert commit message
|
||||
^This reverts commit [0-9a-f]{40}\.$
|
||||
|
||||
# ignore long runs of a single character:
|
||||
\b([A-Za-z])\g{-1}{3,}\b
|
||||
|
||||
# hit-count: 1 file-count: 1
|
||||
# microsoft
|
||||
\b(?:https?://|)(?:(?:(?:blogs|download\.visualstudio|docs|msdn2?|research)\.|)microsoft|blogs\.msdn)\.co(?:m|\.\w\w)/[-_a-zA-Z0-9()=./%]*
|
||||
|
||||
# hit-count: 1 file-count: 1
|
||||
# data url
|
||||
\bdata:[-a-zA-Z=;:/0-9+]*,\S*
|
||||
23
.github/actions/spelling/reject.txt
vendored
23
.github/actions/spelling/reject.txt
vendored
@@ -1,23 +0,0 @@
|
||||
^attache$
|
||||
^bellows?$
|
||||
benefitting
|
||||
occurences?
|
||||
^dependan.*
|
||||
^develope$
|
||||
^developement$
|
||||
^developpe
|
||||
^Devers?$
|
||||
^devex
|
||||
^devide
|
||||
^Devinn?[ae]
|
||||
^devisal
|
||||
^devisor
|
||||
^diables?$
|
||||
^oer$
|
||||
Sorce
|
||||
^[Ss]pae.*
|
||||
^Teh$
|
||||
^untill$
|
||||
^untilling$
|
||||
^venders?$
|
||||
^wether.*
|
||||
28
.github/dependabot.yml
vendored
28
.github/dependabot.yml
vendored
@@ -1,28 +0,0 @@
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: github-actions
|
||||
directory: /
|
||||
schedule:
|
||||
interval: weekly
|
||||
groups:
|
||||
github-actions:
|
||||
patterns:
|
||||
- "*"
|
||||
|
||||
- package-ecosystem: gomod
|
||||
directory: /
|
||||
schedule:
|
||||
interval: weekly
|
||||
groups:
|
||||
gomod:
|
||||
patterns:
|
||||
- "*"
|
||||
|
||||
- package-ecosystem: npm
|
||||
directory: /
|
||||
schedule:
|
||||
interval: weekly
|
||||
groups:
|
||||
npm:
|
||||
patterns:
|
||||
- "*"
|
||||
73
.github/workflows/asset-verification.yml
vendored
73
.github/workflows/asset-verification.yml
vendored
@@ -1,73 +0,0 @@
|
||||
name: Asset Build Verification
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["main"]
|
||||
pull_request:
|
||||
branches: ["main"]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
asset_verification:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: build essential
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
with:
|
||||
node-version: latest
|
||||
|
||||
- uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
|
||||
with:
|
||||
go-version: stable
|
||||
|
||||
- name: install node deps
|
||||
run: |
|
||||
npm ci
|
||||
|
||||
- name: Check for uncommitted changes before asset build
|
||||
id: check-changes-before
|
||||
run: |
|
||||
if [[ -n $(git status --porcelain) ]]; then
|
||||
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "has_changes=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Fail if there are uncommitted changes before build
|
||||
if: steps.check-changes-before.outputs.has_changes == 'true'
|
||||
run: |
|
||||
echo "There are uncommitted changes before running npm run assets"
|
||||
git status
|
||||
exit 1
|
||||
|
||||
- name: Run asset build
|
||||
run: |
|
||||
npm run assets
|
||||
|
||||
- name: Check for uncommitted changes after asset build
|
||||
id: check-changes-after
|
||||
run: |
|
||||
if [[ -n $(git status --porcelain) ]]; then
|
||||
echo "has_changes=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "has_changes=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Fail if assets generated changes
|
||||
if: steps.check-changes-after.outputs.has_changes == 'true'
|
||||
run: |
|
||||
echo "npm run assets generated uncommitted changes. This indicates the repository has outdated generated files."
|
||||
echo "Please run 'npm run assets' locally and commit the changes."
|
||||
git status
|
||||
git diff
|
||||
exit 1
|
||||
35
.github/workflows/docker-pr.yml
vendored
35
.github/workflows/docker-pr.yml
vendored
@@ -2,7 +2,7 @@ name: Docker image builds (pull requests)
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: ["main"]
|
||||
branches: [ "main" ]
|
||||
|
||||
env:
|
||||
DOCKER_METADATA_SET_OUTPUT_ENV: "true"
|
||||
@@ -12,48 +12,33 @@ permissions:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-24.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: build essential
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
node-version: latest
|
||||
go-version: '1.24.x'
|
||||
|
||||
- uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
|
||||
with:
|
||||
go-version: stable
|
||||
|
||||
- uses: ko-build/setup-ko@d006021bd0c28d1ce33a07e7943d48b079944c8d # v0.9
|
||||
- uses: ko-build/setup-ko@v0.8
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # v5.9.0
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ghcr.io/${{ github.repository }}
|
||||
images: ghcr.io/techarohq/anubis
|
||||
|
||||
- name: Build and push
|
||||
id: build
|
||||
run: |
|
||||
npm ci
|
||||
npm run container
|
||||
go run ./cmd/containerbuild --docker-repo ghcr.io/techarohq/anubis --slog-level debug
|
||||
env:
|
||||
PULL_REQUEST_ID: ${{ github.event.number }}
|
||||
DOCKER_REPO: ghcr.io/${{ github.repository }}
|
||||
SLOG_LEVEL: debug
|
||||
|
||||
- run: |
|
||||
echo "Test this with:"
|
||||
echo "docker pull ${DOCKER_IMAGE}"
|
||||
env:
|
||||
DOCKER_IMAGE: ${{ steps.build.outputs.docker_image }}
|
||||
echo "docker pull ${{ steps.build.outputs.docker_image }}"
|
||||
53
.github/workflows/docker.yml
vendored
53
.github/workflows/docker.yml
vendored
@@ -3,8 +3,8 @@ name: Docker image builds
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches: ["main"]
|
||||
tags: ["v*"]
|
||||
branches: [ "main" ]
|
||||
tags: [ "v*" ]
|
||||
|
||||
env:
|
||||
DOCKER_METADATA_SET_OUTPUT_ENV: "true"
|
||||
@@ -18,59 +18,42 @@ permissions:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-24.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: build essential
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- name: Set lowercase image name
|
||||
run: |
|
||||
echo "IMAGE=ghcr.io/${GITHUB_REPOSITORY,,}" >> $GITHUB_ENV
|
||||
|
||||
- uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
node-version: latest
|
||||
go-version: '1.24.x'
|
||||
|
||||
- uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
|
||||
with:
|
||||
go-version: stable
|
||||
- uses: ko-build/setup-ko@v0.8
|
||||
|
||||
- uses: ko-build/setup-ko@d006021bd0c28d1ce33a07e7943d48b079944c8d # v0.9
|
||||
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
username: techarohq
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # v5.9.0
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.IMAGE }}
|
||||
images: ghcr.io/techarohq/anubis
|
||||
|
||||
- name: Build and push
|
||||
id: build
|
||||
run: |
|
||||
npm ci
|
||||
npm run container
|
||||
env:
|
||||
DOCKER_REPO: ${{ env.IMAGE }}
|
||||
SLOG_LEVEL: debug
|
||||
|
||||
go run ./cmd/containerbuild --docker-repo ghcr.io/techarohq/anubis --slog-level debug
|
||||
|
||||
- name: Generate artifact attestation
|
||||
uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3.0.0
|
||||
uses: actions/attest-build-provenance@v2
|
||||
if: ${{github.event_name == 'pull_request'}}
|
||||
with:
|
||||
subject-name: ${{ env.IMAGE }}
|
||||
subject-name: ghcr.io/techarohq/anubis
|
||||
subject-digest: ${{ steps.build.outputs.digest }}
|
||||
push-to-registry: true
|
||||
push-to-registry: true
|
||||
34
.github/workflows/docs-deploy.yml
vendored
34
.github/workflows/docs-deploy.yml
vendored
@@ -3,7 +3,7 @@ name: Docs deploy
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches: ["main"]
|
||||
branches: [ "main" ]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -13,19 +13,16 @@ permissions:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: github.repository == 'TecharoHQ/anubis'
|
||||
runs-on: ubuntu-24.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: techarohq
|
||||
@@ -33,16 +30,13 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # v5.9.0
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ghcr.io/techarohq/anubis/docs
|
||||
tags: |
|
||||
type=sha,enable=true,priority=100,prefix=,suffix=,format=long
|
||||
main
|
||||
|
||||
- name: Build and push
|
||||
id: build
|
||||
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ./docs
|
||||
cache-to: type=gha
|
||||
@@ -52,16 +46,16 @@ jobs:
|
||||
platforms: linux/amd64
|
||||
push: true
|
||||
|
||||
- name: Apply k8s manifests to limsa lominsa
|
||||
uses: actions-hub/kubectl@f14933a23bc8c582b5aa7d108defd8e2cb9fa86d # v1.34.1
|
||||
- name: Apply k8s manifests to aeacus
|
||||
uses: actions-hub/kubectl@master
|
||||
env:
|
||||
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
|
||||
KUBE_CONFIG: ${{ secrets.AEACUS_KUBECONFIG }}
|
||||
with:
|
||||
args: apply -k docs/manifest
|
||||
|
||||
- name: Apply k8s manifests to limsa lominsa
|
||||
uses: actions-hub/kubectl@f14933a23bc8c582b5aa7d108defd8e2cb9fa86d # v1.34.1
|
||||
- name: Apply k8s manifests to aeacus
|
||||
uses: actions-hub/kubectl@master
|
||||
env:
|
||||
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
|
||||
KUBE_CONFIG: ${{ secrets.AEACUS_KUBECONFIG }}
|
||||
with:
|
||||
args: rollout restart -n default deploy/anubis-docs
|
||||
|
||||
42
.github/workflows/docs-test.yml
vendored
42
.github/workflows/docs-test.yml
vendored
@@ -1,42 +0,0 @@
|
||||
name: Docs test build
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: ["main"]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
actions: write
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # v5.9.0
|
||||
with:
|
||||
images: ghcr.io/techarohq/anubis/docs
|
||||
tags: |
|
||||
type=sha,enable=true,priority=100,prefix=,suffix=,format=long
|
||||
main
|
||||
|
||||
- name: Build and push
|
||||
id: build
|
||||
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
|
||||
with:
|
||||
context: ./docs
|
||||
cache-to: type=gha
|
||||
cache-from: type=gha
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
platforms: linux/amd64
|
||||
push: false
|
||||
88
.github/workflows/go.yml
vendored
88
.github/workflows/go.yml
vendored
@@ -2,64 +2,62 @@ name: Go
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["main"]
|
||||
branches: [ "main" ]
|
||||
pull_request:
|
||||
branches: ["main"]
|
||||
branches: [ "main" ]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
actions: write
|
||||
|
||||
jobs:
|
||||
go_tests:
|
||||
build:
|
||||
#runs-on: alrest-techarohq
|
||||
runs-on: ubuntu-24.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: build essential
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
- name: build essential
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
with:
|
||||
node-version: latest
|
||||
- name: Set up Homebrew
|
||||
uses: Homebrew/actions/setup-homebrew@master
|
||||
|
||||
- uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
|
||||
with:
|
||||
go-version: stable
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
/home/linuxbrew/.linuxbrew/Cellar
|
||||
/home/linuxbrew/.linuxbrew/bin
|
||||
/home/linuxbrew/.linuxbrew/etc
|
||||
/home/linuxbrew/.linuxbrew/include
|
||||
/home/linuxbrew/.linuxbrew/lib
|
||||
/home/linuxbrew/.linuxbrew/opt
|
||||
/home/linuxbrew/.linuxbrew/sbin
|
||||
/home/linuxbrew/.linuxbrew/share
|
||||
/home/linuxbrew/.linuxbrew/var
|
||||
key: ${{ runner.os }}-go-homebrew-cellar-${{ hashFiles('go.sum') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-go-homebrew-cellar-
|
||||
|
||||
- name: Cache playwright binaries
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
|
||||
id: playwright-cache
|
||||
with:
|
||||
path: |
|
||||
~/.cache/ms-playwright
|
||||
key: ${{ runner.os }}-playwright-${{ hashFiles('**/go.sum') }}
|
||||
- name: Install Brew dependencies
|
||||
run: |
|
||||
brew bundle
|
||||
|
||||
- name: install node deps
|
||||
run: |
|
||||
npm ci
|
||||
- name: Setup Golang caches
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
~/go/pkg/mod
|
||||
key: ${{ runner.os }}-golang-${{ hashFiles('**/go.sum') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-golang-
|
||||
|
||||
- name: install playwright browsers
|
||||
run: |
|
||||
npx --no-install playwright@1.52.0 install --with-deps
|
||||
npx --no-install playwright@1.52.0 run-server --port 9001 &
|
||||
- name: Build
|
||||
run: go build ./...
|
||||
|
||||
- name: Build
|
||||
run: npm run build
|
||||
|
||||
- name: Test
|
||||
run: npm run test
|
||||
|
||||
- name: Lint with staticcheck
|
||||
uses: dominikh/staticcheck-action@024238d2898c874f26d723e7d0ff4308c35589a2 # v1.4.0
|
||||
with:
|
||||
version: "latest"
|
||||
|
||||
- name: Govulncheck
|
||||
run: |
|
||||
go tool govulncheck ./...
|
||||
- name: Test
|
||||
run: go test ./...
|
||||
|
||||
54
.github/workflows/package-builds-stable.yml
vendored
54
.github/workflows/package-builds-stable.yml
vendored
@@ -1,54 +0,0 @@
|
||||
name: Package builds (stable)
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
# release:
|
||||
# types: [published]
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
actions: write
|
||||
|
||||
jobs:
|
||||
package_builds:
|
||||
#runs-on: alrest-techarohq
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: build essential
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
with:
|
||||
node-version: latest
|
||||
|
||||
- uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
|
||||
with:
|
||||
go-version: stable
|
||||
|
||||
- name: install node deps
|
||||
run: |
|
||||
npm ci
|
||||
|
||||
- name: Build Packages
|
||||
run: |
|
||||
go tool yeet
|
||||
|
||||
- name: Upload released artifacts
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ github.TOKEN }}
|
||||
RELEASE_VERSION: ${{github.event.release.tag_name}}
|
||||
shell: bash
|
||||
run: |
|
||||
RELEASE="${RELEASE_VERSION}"
|
||||
cd var
|
||||
for file in *; do
|
||||
gh release upload $RELEASE $file
|
||||
done
|
||||
48
.github/workflows/package-builds-unstable.yml
vendored
48
.github/workflows/package-builds-unstable.yml
vendored
@@ -1,48 +0,0 @@
|
||||
name: Package builds (unstable)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["main"]
|
||||
pull_request:
|
||||
branches: ["main"]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
actions: write
|
||||
|
||||
jobs:
|
||||
package_builds:
|
||||
#runs-on: alrest-techarohq
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: build essential
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential
|
||||
|
||||
- uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
with:
|
||||
node-version: latest
|
||||
|
||||
- uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
|
||||
with:
|
||||
go-version: stable
|
||||
|
||||
- name: install node deps
|
||||
run: |
|
||||
npm ci
|
||||
|
||||
- name: Build Packages
|
||||
run: |
|
||||
go tool yeet
|
||||
|
||||
- uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
|
||||
with:
|
||||
name: packages
|
||||
path: var/*
|
||||
63
.github/workflows/smoke-tests.yml
vendored
63
.github/workflows/smoke-tests.yml
vendored
@@ -1,63 +0,0 @@
|
||||
name: Smoke tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["main"]
|
||||
pull_request:
|
||||
branches: ["main"]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
smoke-test:
|
||||
strategy:
|
||||
matrix:
|
||||
test:
|
||||
- default-config-macro
|
||||
- docker-registry
|
||||
- double_slash
|
||||
- forced-language
|
||||
- git-clone
|
||||
- git-push
|
||||
- healthcheck
|
||||
- i18n
|
||||
- palemoon/amd64
|
||||
#- palemoon/i386
|
||||
- robots_txt
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
with:
|
||||
node-version: latest
|
||||
|
||||
- uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
|
||||
with:
|
||||
go-version: stable
|
||||
|
||||
- uses: ko-build/setup-ko@d006021bd0c28d1ce33a07e7943d48b079944c8d # v0.9
|
||||
|
||||
- name: Install utils
|
||||
run: |
|
||||
go install ./utils/cmd/...
|
||||
|
||||
- name: Run test
|
||||
run: |
|
||||
cd test/${{ matrix.test }}
|
||||
backoff-retry --try-count 10 ./test.sh
|
||||
|
||||
- name: Sanitize artifact name
|
||||
if: always()
|
||||
run: echo "ARTIFACT_NAME=${{ matrix.test }}" | sed 's|/|-|g' >> $GITHUB_ENV
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4
|
||||
if: always()
|
||||
with:
|
||||
name: ${{ env.ARTIFACT_NAME }}
|
||||
path: test/${{ matrix.test }}/var
|
||||
118
.github/workflows/spelling.yml
vendored
118
.github/workflows/spelling.yml
vendored
@@ -1,118 +0,0 @@
|
||||
name: Check Spelling
|
||||
|
||||
# Comment management is handled through a secondary job, for details see:
|
||||
# https://github.com/check-spelling/check-spelling/wiki/Feature%3A-Restricted-Permissions
|
||||
#
|
||||
# `jobs.comment-push` runs when a push is made to a repository and the `jobs.spelling` job needs to make a comment
|
||||
# (in odd cases, it might actually run just to collapse a comment, but that's fairly rare)
|
||||
# it needs `contents: write` in order to add a comment.
|
||||
#
|
||||
# `jobs.comment-pr` runs when a pull_request is made to a repository and the `jobs.spelling` job needs to make a comment
|
||||
# or collapse a comment (in the case where it had previously made a comment and now no longer needs to show a comment)
|
||||
# it needs `pull-requests: write` in order to manipulate those comments.
|
||||
|
||||
# Updating pull request branches is managed via comment handling.
|
||||
# For details, see: https://github.com/check-spelling/check-spelling/wiki/Feature:-Update-expect-list
|
||||
#
|
||||
# These elements work together to make it happen:
|
||||
#
|
||||
# `on.issue_comment`
|
||||
# This event listens to comments by users asking to update the metadata.
|
||||
#
|
||||
# `jobs.update`
|
||||
# This job runs in response to an issue_comment and will push a new commit
|
||||
# to update the spelling metadata.
|
||||
#
|
||||
# `with.experimental_apply_changes_via_bot`
|
||||
# Tells the action to support and generate messages that enable it
|
||||
# to make a commit to update the spelling metadata.
|
||||
#
|
||||
# `with.ssh_key`
|
||||
# In order to trigger workflows when the commit is made, you can provide a
|
||||
# secret (typically, a write-enabled github deploy key).
|
||||
#
|
||||
# For background, see: https://github.com/check-spelling/check-spelling/wiki/Feature:-Update-with-deploy-key
|
||||
|
||||
# SARIF reporting
|
||||
#
|
||||
# Access to SARIF reports is generally restricted (by GitHub) to members of the repository.
|
||||
#
|
||||
# Requires enabling `security-events: write`
|
||||
# and configuring the action with `use_sarif: 1`
|
||||
#
|
||||
# For information on the feature, see: https://github.com/check-spelling/check-spelling/wiki/Feature:-SARIF-output
|
||||
|
||||
# Minimal workflow structure:
|
||||
#
|
||||
# on:
|
||||
# push:
|
||||
# ...
|
||||
# pull_request_target:
|
||||
# ...
|
||||
# jobs:
|
||||
# # you only want the spelling job, all others should be omitted
|
||||
# spelling:
|
||||
# # remove `security-events: write` and `use_sarif: 1`
|
||||
# # remove `experimental_apply_changes_via_bot: 1`
|
||||
# ... otherwise adjust the `with:` as you wish
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- '**'
|
||||
tags-ignore:
|
||||
- '**'
|
||||
pull_request:
|
||||
branches:
|
||||
- '**'
|
||||
types:
|
||||
- 'opened'
|
||||
- 'reopened'
|
||||
- 'synchronize'
|
||||
|
||||
jobs:
|
||||
spelling:
|
||||
name: Check Spelling
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
actions: read
|
||||
security-events: write
|
||||
outputs:
|
||||
followup: ${{ steps.spelling.outputs.followup }}
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ contains(github.event_name, 'pull_request') || github.event_name == 'push' }}
|
||||
concurrency:
|
||||
group: spelling-${{ github.event.pull_request.number || github.ref }}
|
||||
# note: If you use only_check_changed_files, you do not want cancel-in-progress
|
||||
cancel-in-progress: true
|
||||
steps:
|
||||
- name: check-spelling
|
||||
id: spelling
|
||||
uses: check-spelling/check-spelling@c635c2f3f714eec2fcf27b643a1919b9a811ef2e # v0.0.25
|
||||
with:
|
||||
suppress_push_for_open_pull_request: ${{ github.actor != 'dependabot[bot]' && 1 }}
|
||||
checkout: true
|
||||
check_file_names: 1
|
||||
post_comment: 0
|
||||
use_magic_file: 1
|
||||
warnings: bad-regex,binary-file,deprecated-feature,ignored-expect-variant,large-file,limited-references,no-newline-at-eof,noisy-file,non-alpha-in-dictionary,token-is-substring,unexpected-line-ending,whitespace-in-dictionary,minified-file,unsupported-configuration,no-files-to-check,unclosed-block-ignore-begin,unclosed-block-ignore-end
|
||||
use_sarif: ${{ (!github.event.pull_request || (github.event.pull_request.head.repo.full_name == github.repository)) && 1 }}
|
||||
check_extra_dictionaries: ""
|
||||
dictionary_source_prefixes: >
|
||||
{
|
||||
"cspell": "https://raw.githubusercontent.com/check-spelling/cspell-dicts/v20241114/dictionaries/"
|
||||
}
|
||||
extra_dictionaries: |
|
||||
cspell:software-terms/softwareTerms.txt
|
||||
cspell:golang/go.txt
|
||||
cspell:npm/npm.txt
|
||||
cspell:k8s/k8s.txt
|
||||
cspell:python/python/python-lib.txt
|
||||
cspell:aws/aws.txt
|
||||
cspell:node/node.txt
|
||||
cspell:html/html.txt
|
||||
cspell:filetypes/filetypes.txt
|
||||
cspell:python/common/extra.txt
|
||||
cspell:docker/docker-words.txt
|
||||
cspell:fullstack/fullstack.txt
|
||||
37
.github/workflows/ssh-ci-runner-cron.yml
vendored
37
.github/workflows/ssh-ci-runner-cron.yml
vendored
@@ -1,37 +0,0 @@
|
||||
name: Regenerate ssh ci runner image
|
||||
|
||||
on:
|
||||
# pull_request:
|
||||
# branches: ["main"]
|
||||
schedule:
|
||||
- cron: "0 0 1,8,15,22 * *"
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: write
|
||||
packages: write
|
||||
|
||||
jobs:
|
||||
ssh-ci-rebuild:
|
||||
if: github.repository == 'TecharoHQ/anubis'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
|
||||
- name: Build and push
|
||||
run: |
|
||||
cd ./test/ssh-ci
|
||||
docker buildx bake --push
|
||||
45
.github/workflows/ssh-ci.yml
vendored
45
.github/workflows/ssh-ci.yml
vendored
@@ -1,45 +0,0 @@
|
||||
name: SSH CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["main"]
|
||||
# pull_request:
|
||||
# branches: ["main"]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
ssh:
|
||||
if: github.repository == 'TecharoHQ/anubis'
|
||||
runs-on: alrest-techarohq
|
||||
strategy:
|
||||
matrix:
|
||||
host:
|
||||
- riscv64
|
||||
- ppc64le
|
||||
- aarch64-4k
|
||||
- aarch64-16k
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install CI target SSH key
|
||||
uses: shimataro/ssh-key-action@d4fffb50872869abe2d9a9098a6d9c5aa7d16be4 # v2.7.0
|
||||
with:
|
||||
key: ${{ secrets.CI_SSH_KEY }}
|
||||
name: id_rsa
|
||||
known_hosts: ${{ secrets.CI_SSH_KNOWN_HOSTS }}
|
||||
|
||||
- uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
|
||||
with:
|
||||
go-version: stable
|
||||
|
||||
- name: Run CI
|
||||
run: go run ./utils/cmd/backoff-retry bash test/ssh-ci/rigging.sh ${{ matrix.host }}
|
||||
env:
|
||||
GITHUB_RUN_ID: ${{ github.run_id }}
|
||||
35
.github/workflows/zizmor.yml
vendored
35
.github/workflows/zizmor.yml
vendored
@@ -1,35 +0,0 @@
|
||||
name: zizmor
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- '.github/workflows/*.ya?ml'
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/workflows/*.ya?ml'
|
||||
|
||||
jobs:
|
||||
zizmor:
|
||||
name: zizmor latest via PyPI
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
security-events: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
|
||||
|
||||
- name: Run zizmor 🌈
|
||||
run: uvx zizmor --format sarif . > results.sarif
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Upload SARIF file
|
||||
uses: github/codeql-action/upload-sarif@0499de31b99561a6d14a36a5f662c2a54f91beee # v4.31.2
|
||||
with:
|
||||
sarif_file: results.sarif
|
||||
category: zizmor
|
||||
5
.github/zizmor.yml
vendored
5
.github/zizmor.yml
vendored
@@ -1,5 +0,0 @@
|
||||
rules:
|
||||
unpinned-uses:
|
||||
config:
|
||||
policies:
|
||||
Homebrew/actions/*: any
|
||||
24
.gitignore
vendored
24
.gitignore
vendored
@@ -1,24 +1,2 @@
|
||||
.env
|
||||
*.deb
|
||||
*.rpm
|
||||
|
||||
# Additional package locks
|
||||
pnpm-lock.yaml
|
||||
yarn.lock
|
||||
|
||||
# Go binaries and test artifacts
|
||||
main
|
||||
*.test
|
||||
|
||||
node_modules
|
||||
|
||||
# MacOS
|
||||
.DS_store
|
||||
|
||||
# Intellij
|
||||
.idea
|
||||
|
||||
# how does this get here
|
||||
doc/VERSION
|
||||
|
||||
web/static/locales/*.json
|
||||
*.rpm
|
||||
11
.vscode/extensions.json
vendored
11
.vscode/extensions.json
vendored
@@ -1,11 +0,0 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"esbenp.prettier-vscode",
|
||||
"ms-azuretools.vscode-containers",
|
||||
"golang.go",
|
||||
"unifiedjs.vscode-mdx",
|
||||
"a-h.templ",
|
||||
"redhat.vscode-yaml",
|
||||
"streetsidesoftware.code-spell-checker"
|
||||
]
|
||||
}
|
||||
27
.vscode/launch.json
vendored
27
.vscode/launch.json
vendored
@@ -1,27 +0,0 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Launch Package",
|
||||
"type": "go",
|
||||
"request": "launch",
|
||||
"mode": "auto",
|
||||
"program": "${fileDirname}"
|
||||
},
|
||||
{
|
||||
"name": "Anubis [dev]",
|
||||
"command": "npm run dev",
|
||||
"request": "launch",
|
||||
"type": "node-terminal"
|
||||
},
|
||||
{
|
||||
"name": "Start Docs",
|
||||
"command": "cd docs && npm ci && npm run start",
|
||||
"request": "launch",
|
||||
"type": "node-terminal"
|
||||
}
|
||||
]
|
||||
}
|
||||
34
.vscode/settings.json
vendored
34
.vscode/settings.json
vendored
@@ -1,34 +0,0 @@
|
||||
{
|
||||
"github.copilot.enable": {
|
||||
"*": false,
|
||||
"plaintext": false,
|
||||
"markdown": false,
|
||||
"mdx": false,
|
||||
"json": false,
|
||||
"scminput": false,
|
||||
"yaml": false,
|
||||
"go": false,
|
||||
"zig": false,
|
||||
"javascript": false,
|
||||
"properties": false
|
||||
},
|
||||
"[markdown]": {
|
||||
"editor.wordWrap": "wordWrapColumn",
|
||||
"editor.wordWrapColumn": 80,
|
||||
"editor.wordBasedSuggestions": "off"
|
||||
},
|
||||
"[mdx]": {
|
||||
"editor.wordWrap": "wordWrapColumn",
|
||||
"editor.wordWrapColumn": 80,
|
||||
"editor.wordBasedSuggestions": "off"
|
||||
},
|
||||
"[nunjucks]": {
|
||||
"editor.wordWrap": "wordWrapColumn",
|
||||
"editor.wordWrapColumn": 80,
|
||||
"editor.wordBasedSuggestions": "off"
|
||||
},
|
||||
"cSpell.enabledFileTypes": {
|
||||
"mdx": true,
|
||||
"md": true
|
||||
}
|
||||
}
|
||||
5
Brewfile
5
Brewfile
@@ -1,7 +1,4 @@
|
||||
# programming languages
|
||||
brew "go@1.24"
|
||||
brew "node"
|
||||
brew "ko"
|
||||
brew "esbuild"
|
||||
brew "zstd"
|
||||
brew "brotli"
|
||||
brew "ko"
|
||||
23
Dockerfile
Normal file
23
Dockerfile
Normal file
@@ -0,0 +1,23 @@
|
||||
FROM docker.io/library/golang:1.24 AS build
|
||||
ARG BUILDKIT_SBOM_SCAN_CONTEXT=true BUILDKIT_SBOM_SCAN_STAGE=true
|
||||
|
||||
WORKDIR /app
|
||||
COPY go.mod go.sum /app/
|
||||
RUN go mod download
|
||||
|
||||
COPY . .
|
||||
RUN --mount=type=cache,target=/root/.cache \
|
||||
VERSION=$(git describe --tags --always --dirty) \
|
||||
&& go build -o /app/bin/anubis -ldflags="-X github.com/TecharoHQ/anubis.Version=${VERSION}" ./cmd/anubis
|
||||
|
||||
FROM docker.io/library/debian:bookworm AS runtime
|
||||
ARG BUILDKIT_SBOM_SCAN_STAGE=true
|
||||
RUN apt-get update \
|
||||
&& apt-get -y install ca-certificates
|
||||
|
||||
COPY --from=build /app/bin/anubis /app/bin/anubis
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 CMD ["/app/bin/anubis", "--healthcheck"]
|
||||
CMD ["/app/bin/anubis"]
|
||||
|
||||
LABEL org.opencontainers.image.source="https://github.com/TecharoHQ/anubis"
|
||||
34
Makefile
34
Makefile
@@ -1,34 +0,0 @@
|
||||
VERSION= $(shell cat ./VERSION)
|
||||
GO?= go
|
||||
NPM?= npm
|
||||
|
||||
.PHONY: build assets deps lint prebaked-build test
|
||||
|
||||
all: build
|
||||
|
||||
deps:
|
||||
$(NPM) ci
|
||||
$(GO) mod download
|
||||
|
||||
assets: PATH:=$(PWD)/node_modules/.bin:$(PATH)
|
||||
assets: deps
|
||||
$(GO) generate ./...
|
||||
./web/build.sh
|
||||
./xess/build.sh
|
||||
|
||||
build: assets
|
||||
$(GO) build -o ./var/anubis ./cmd/anubis
|
||||
$(GO) build -o ./var/robots2policy ./cmd/robots2policy
|
||||
@echo "Anubis is now built to ./var/anubis"
|
||||
|
||||
lint: assets
|
||||
$(GO) vet ./...
|
||||
$(GO) tool staticcheck ./...
|
||||
$(GO) tool govulncheck ./...
|
||||
|
||||
prebaked-build:
|
||||
$(GO) build -o ./var/anubis -ldflags "-X 'github.com/TecharoHQ/anubis.Version=$(VERSION)'" ./cmd/anubis
|
||||
$(GO) build -o ./var/robots2policy -ldflags "-X 'github.com/TecharoHQ/anubis.Version=$(VERSION)'" ./cmd/robots2policy
|
||||
|
||||
test: assets
|
||||
$(GO) test ./...
|
||||
6
PULL_REQUEST_TEMPLATE.md
Normal file
6
PULL_REQUEST_TEMPLATE.md
Normal file
@@ -0,0 +1,6 @@
|
||||
<!-- delete me and describe your change here -->
|
||||
|
||||
Checklist:
|
||||
|
||||
- [ ] Added a description of the changes to the `[Unreleased]` section of docs/docs/CHANGELOG.md
|
||||
- [ ] Tested this at least manually
|
||||
80
README.md
80
README.md
@@ -1,7 +1,7 @@
|
||||
# Anubis
|
||||
|
||||
<center>
|
||||
<img width=256 src="./web/static/img/happy.webp" alt="A smiling chibi dark-skinned anthro jackal with brown hair and tall ears looking victorious with a thumbs-up" />
|
||||
<img width=256 src="./cmd/anubis/static/img/happy.webp" alt="A smiling chibi dark-skinned anthro jackal with brown hair and tall ears looking victorious with a thumbs-up" />
|
||||
</center>
|
||||
|
||||

|
||||
@@ -9,89 +9,23 @@
|
||||

|
||||

|
||||

|
||||
[](https://github.com/sponsors/Xe)
|
||||
|
||||
## Sponsors
|
||||
Anubis [weighs the soul of your connection](https://en.wikipedia.org/wiki/Weighing_of_souls) using a sha256 proof-of-work challenge in order to protect upstream resources from scraper bots.
|
||||
|
||||
Anubis is brought to you by sponsors and donors like:
|
||||
Installing and using this will likely result in your website not being indexed by some search engines. This is considered a feature of Anubis, not a bug.
|
||||
|
||||
### Diamond Tier
|
||||
|
||||
<a href="https://www.raptorcs.com/content/base/products.html">
|
||||
<img src="./docs/static/img/sponsors/raptor-computing-logo.webp" alt="Raptor Computing Systems" height=64 />
|
||||
</a>
|
||||
|
||||
### Gold Tier
|
||||
|
||||
<a href="https://distrust.co?utm_campaign=github&utm_medium=referral&utm_content=anubis">
|
||||
<img src="./docs/static/img/sponsors/distrust-logo.webp" alt="Distrust" height="64">
|
||||
</a>
|
||||
<a href="https://terminaltrove.com/?utm_campaign=github&utm_medium=referral&utm_content=anubis&utm_source=abgh">
|
||||
<img src="./docs/static/img/sponsors/terminal-trove.webp" alt="Terminal Trove" height="64">
|
||||
</a>
|
||||
<a href="https://canine.tools?utm_campaign=github&utm_medium=referral&utm_content=anubis">
|
||||
<img src="./docs/static/img/sponsors/caninetools-logo.webp" alt="canine.tools" height="64">
|
||||
</a>
|
||||
<a href="https://weblate.org/">
|
||||
<img src="./docs/static/img/sponsors/weblate-logo.webp" alt="Weblate" height="64">
|
||||
</a>
|
||||
<a href="https://uberspace.de/">
|
||||
<img src="./docs/static/img/sponsors/uberspace-logo.webp" alt="Uberspace" height="64">
|
||||
</a>
|
||||
<a href="https://wildbase.xyz/">
|
||||
<img src="./docs/static/img/sponsors/wildbase-logo.webp" alt="Wildbase" height="64">
|
||||
</a>
|
||||
<a href="https://emma.pet">
|
||||
<img
|
||||
src="./docs/static/img/sponsors/nepeat-logo.webp"
|
||||
alt="Cat eyes over the word Emma in a serif font"
|
||||
height="64"
|
||||
/>
|
||||
</a>
|
||||
<a href="https://fabulous.systems/">
|
||||
<img
|
||||
src="./docs/static/img/sponsors/fabulous-systems.webp"
|
||||
alt="Cat eyes over the word Emma in a serif font"
|
||||
height="64"
|
||||
/>
|
||||
</a>
|
||||
|
||||
## Overview
|
||||
|
||||
Anubis is a Web AI Firewall Utility that [weighs the soul of your connection](https://en.wikipedia.org/wiki/Weighing_of_souls) using one or more challenges in order to protect upstream resources from scraper bots.
|
||||
|
||||
This program is designed to help protect the small internet from the endless storm of requests that flood in from AI companies. Anubis is as lightweight as possible to ensure that everyone can afford to protect the communities closest to them.
|
||||
|
||||
Anubis is a bit of a nuclear response. This will result in your website being blocked from smaller scrapers and may inhibit "good bots" like the Internet Archive. You can configure [bot policy definitions](./docs/docs/admin/policies.mdx) to explicitly allowlist them and we are working on a curated set of "known good" bots to allow for a compromise between discoverability and uptime.
|
||||
This is a bit of a nuclear response, but AI scraper bots scraping so aggressively have forced my hand. I hate that I have to do this, but this is what we get for the modern Internet because bots don't conform to standards like robots.txt, even when they claim to.
|
||||
|
||||
In most cases, you should not need this and can probably get by using Cloudflare to protect a given origin. However, for circumstances where you can't or won't use Cloudflare, Anubis is there for you.
|
||||
|
||||
If you want to try this out, visit the Anubis documentation site at [anubis.techaro.lol](https://anubis.techaro.lol).
|
||||
If you want to try this out, connect to [anubis.techaro.lol](https://anubis.techaro.lol).
|
||||
|
||||
## Support
|
||||
|
||||
If you run into any issues running Anubis, please [open an issue](https://github.com/TecharoHQ/anubis/issues/new?template=Blank+issue). Please include all the information I would need to diagnose your issue.
|
||||
If you run into any issues running Anubis, please [open an issue](https://github.com/TecharoHQ/anubis/issues/new?template=Blank+issue) and tag it with the Anubis tag. Please include all the information I would need to diagnose your issue.
|
||||
|
||||
For live chat, please join the [Patreon](https://patreon.com/cadey) and ask in the Patron discord in the channel `#anubis`.
|
||||
|
||||
## Star History
|
||||
|
||||
<a href="https://www.star-history.com/#TecharoHQ/anubis&Date">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=TecharoHQ/anubis&type=Date&theme=dark" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=TecharoHQ/anubis&type=Date" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=TecharoHQ/anubis&type=Date" />
|
||||
</picture>
|
||||
</a>
|
||||
|
||||
## Packaging Status
|
||||
|
||||
[](https://repology.org/project/anubis-anti-crawler/versions)
|
||||
|
||||
## Contributors
|
||||
|
||||
<a href="https://github.com/TecharoHQ/anubis/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=TecharoHQ/anubis" />
|
||||
</a>
|
||||
|
||||
Made with [contrib.rocks](https://contrib.rocks).
|
||||
[](https://www.star-history.com/#TecharoHQ/anubis&Date)
|
||||
|
||||
13
SECURITY.md
13
SECURITY.md
@@ -1,13 +0,0 @@
|
||||
# Security Policy
|
||||
|
||||
Techaro follows the [Semver 2.0 scheme](https://semver.org/).
|
||||
|
||||
## Supported Versions
|
||||
|
||||
Techaro strives to support the two most recent minor versions of Anubis. Patches to those versions will be published as patch releases.
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
Email security@techaro.lol with details on the vulnerability and reproduction steps. You will get a response as soon as possible.
|
||||
|
||||
Please take care to send your email as a mixed plaintext and HTML message. Messages with GPG signatures or that are plaintext only may be blocked by the spam filter.
|
||||
44
anubis.go
44
anubis.go
@@ -1,44 +0,0 @@
|
||||
// Package anubis contains the version number of Anubis.
|
||||
package anubis
|
||||
|
||||
import "time"
|
||||
|
||||
// Version is the current version of Anubis.
|
||||
//
|
||||
// This variable is set at build time using the -X linker flag. If not set,
|
||||
// it defaults to "devel".
|
||||
var Version = "devel"
|
||||
|
||||
// CookieName is the name of the cookie that Anubis uses in order to validate
|
||||
// access.
|
||||
var CookieName = "techaro.lol-anubis"
|
||||
|
||||
// TestCookieName is the name of the cookie that Anubis uses in order to check
|
||||
// if cookies are enabled on the client's browser.
|
||||
var TestCookieName = "techaro.lol-anubis-cookie-verification"
|
||||
|
||||
// CookieDefaultExpirationTime is the amount of time before the cookie/JWT expires.
|
||||
const CookieDefaultExpirationTime = 7 * 24 * time.Hour
|
||||
|
||||
// BasePrefix is a global prefix for all Anubis endpoints. Can be emptied to remove the prefix entirely.
|
||||
var BasePrefix = ""
|
||||
|
||||
// PublicUrl is the externally accessible URL for this Anubis instance.
|
||||
var PublicUrl = ""
|
||||
|
||||
// StaticPath is the location where all static Anubis assets are located.
|
||||
const StaticPath = "/.within.website/x/cmd/anubis/"
|
||||
|
||||
// APIPrefix is the location where all Anubis API endpoints are located.
|
||||
const APIPrefix = "/.within.website/x/cmd/anubis/api/"
|
||||
|
||||
// DefaultDifficulty is the default "difficulty" (number of leading zeroes)
|
||||
// that must be met by the client in order to pass the challenge.
|
||||
const DefaultDifficulty = 4
|
||||
|
||||
// ForcedLanguage is the language being used instead of the one of the request's Accept-Language header
|
||||
// if being set.
|
||||
var ForcedLanguage = ""
|
||||
|
||||
// UseSimplifiedExplanation can be set to true for using the simplified explanation
|
||||
var UseSimplifiedExplanation = false
|
||||
5
cmd/anubis/CHANGELOG.md
Normal file
5
cmd/anubis/CHANGELOG.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# CHANGELOG
|
||||
|
||||
## 2025-01-24
|
||||
|
||||
- Added support for custom bot policy documentation, allowing administrators to change how Anubis works to meet their needs.
|
||||
409
cmd/anubis/botPolicies.json
Normal file
409
cmd/anubis/botPolicies.json
Normal file
@@ -0,0 +1,409 @@
|
||||
{
|
||||
"bots": [
|
||||
{
|
||||
"name": "amazonbot",
|
||||
"user_agent_regex": "Amazonbot",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"_comment": "This is based on the BGP routes advertised by AS7941",
|
||||
"name": "internet-archive",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"207.241.224.0/20",
|
||||
"208.70.24.0/21",
|
||||
"2620:0:9c0::/48"
|
||||
]
|
||||
},
|
||||
{
|
||||
"_comment": "Based on: https://developers.google.com/static/search/apis/ipranges/googlebot.json",
|
||||
"name": "googlebot",
|
||||
"user_agent_regex": "\\+http\\://www\\.google\\.com/bot\\.html",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"2001:4860:4801:10::/64",
|
||||
"2001:4860:4801:11::/64",
|
||||
"2001:4860:4801:12::/64",
|
||||
"2001:4860:4801:13::/64",
|
||||
"2001:4860:4801:14::/64",
|
||||
"2001:4860:4801:15::/64",
|
||||
"2001:4860:4801:16::/64",
|
||||
"2001:4860:4801:17::/64",
|
||||
"2001:4860:4801:18::/64",
|
||||
"2001:4860:4801:19::/64",
|
||||
"2001:4860:4801:1a::/64",
|
||||
"2001:4860:4801:1b::/64",
|
||||
"2001:4860:4801:1c::/64",
|
||||
"2001:4860:4801:1d::/64",
|
||||
"2001:4860:4801:1e::/64",
|
||||
"2001:4860:4801:1f::/64",
|
||||
"2001:4860:4801:20::/64",
|
||||
"2001:4860:4801:21::/64",
|
||||
"2001:4860:4801:22::/64",
|
||||
"2001:4860:4801:23::/64",
|
||||
"2001:4860:4801:24::/64",
|
||||
"2001:4860:4801:25::/64",
|
||||
"2001:4860:4801:26::/64",
|
||||
"2001:4860:4801:27::/64",
|
||||
"2001:4860:4801:28::/64",
|
||||
"2001:4860:4801:29::/64",
|
||||
"2001:4860:4801:2::/64",
|
||||
"2001:4860:4801:2a::/64",
|
||||
"2001:4860:4801:2b::/64",
|
||||
"2001:4860:4801:2c::/64",
|
||||
"2001:4860:4801:2d::/64",
|
||||
"2001:4860:4801:2e::/64",
|
||||
"2001:4860:4801:2f::/64",
|
||||
"2001:4860:4801:31::/64",
|
||||
"2001:4860:4801:32::/64",
|
||||
"2001:4860:4801:33::/64",
|
||||
"2001:4860:4801:34::/64",
|
||||
"2001:4860:4801:35::/64",
|
||||
"2001:4860:4801:36::/64",
|
||||
"2001:4860:4801:37::/64",
|
||||
"2001:4860:4801:38::/64",
|
||||
"2001:4860:4801:39::/64",
|
||||
"2001:4860:4801:3a::/64",
|
||||
"2001:4860:4801:3b::/64",
|
||||
"2001:4860:4801:3c::/64",
|
||||
"2001:4860:4801:3d::/64",
|
||||
"2001:4860:4801:3e::/64",
|
||||
"2001:4860:4801:40::/64",
|
||||
"2001:4860:4801:41::/64",
|
||||
"2001:4860:4801:42::/64",
|
||||
"2001:4860:4801:43::/64",
|
||||
"2001:4860:4801:44::/64",
|
||||
"2001:4860:4801:45::/64",
|
||||
"2001:4860:4801:46::/64",
|
||||
"2001:4860:4801:47::/64",
|
||||
"2001:4860:4801:48::/64",
|
||||
"2001:4860:4801:49::/64",
|
||||
"2001:4860:4801:4a::/64",
|
||||
"2001:4860:4801:4b::/64",
|
||||
"2001:4860:4801:4c::/64",
|
||||
"2001:4860:4801:50::/64",
|
||||
"2001:4860:4801:51::/64",
|
||||
"2001:4860:4801:52::/64",
|
||||
"2001:4860:4801:53::/64",
|
||||
"2001:4860:4801:54::/64",
|
||||
"2001:4860:4801:55::/64",
|
||||
"2001:4860:4801:56::/64",
|
||||
"2001:4860:4801:60::/64",
|
||||
"2001:4860:4801:61::/64",
|
||||
"2001:4860:4801:62::/64",
|
||||
"2001:4860:4801:63::/64",
|
||||
"2001:4860:4801:64::/64",
|
||||
"2001:4860:4801:65::/64",
|
||||
"2001:4860:4801:66::/64",
|
||||
"2001:4860:4801:67::/64",
|
||||
"2001:4860:4801:68::/64",
|
||||
"2001:4860:4801:69::/64",
|
||||
"2001:4860:4801:6a::/64",
|
||||
"2001:4860:4801:6b::/64",
|
||||
"2001:4860:4801:6c::/64",
|
||||
"2001:4860:4801:6d::/64",
|
||||
"2001:4860:4801:6e::/64",
|
||||
"2001:4860:4801:6f::/64",
|
||||
"2001:4860:4801:70::/64",
|
||||
"2001:4860:4801:71::/64",
|
||||
"2001:4860:4801:72::/64",
|
||||
"2001:4860:4801:73::/64",
|
||||
"2001:4860:4801:74::/64",
|
||||
"2001:4860:4801:75::/64",
|
||||
"2001:4860:4801:76::/64",
|
||||
"2001:4860:4801:77::/64",
|
||||
"2001:4860:4801:78::/64",
|
||||
"2001:4860:4801:79::/64",
|
||||
"2001:4860:4801:80::/64",
|
||||
"2001:4860:4801:81::/64",
|
||||
"2001:4860:4801:82::/64",
|
||||
"2001:4860:4801:83::/64",
|
||||
"2001:4860:4801:84::/64",
|
||||
"2001:4860:4801:85::/64",
|
||||
"2001:4860:4801:86::/64",
|
||||
"2001:4860:4801:87::/64",
|
||||
"2001:4860:4801:88::/64",
|
||||
"2001:4860:4801:90::/64",
|
||||
"2001:4860:4801:91::/64",
|
||||
"2001:4860:4801:92::/64",
|
||||
"2001:4860:4801:93::/64",
|
||||
"2001:4860:4801:94::/64",
|
||||
"2001:4860:4801:95::/64",
|
||||
"2001:4860:4801:96::/64",
|
||||
"2001:4860:4801:a0::/64",
|
||||
"2001:4860:4801:a1::/64",
|
||||
"2001:4860:4801:a2::/64",
|
||||
"2001:4860:4801:a3::/64",
|
||||
"2001:4860:4801:a4::/64",
|
||||
"2001:4860:4801:a5::/64",
|
||||
"2001:4860:4801:c::/64",
|
||||
"2001:4860:4801:f::/64",
|
||||
"192.178.5.0/27",
|
||||
"192.178.6.0/27",
|
||||
"192.178.6.128/27",
|
||||
"192.178.6.160/27",
|
||||
"192.178.6.192/27",
|
||||
"192.178.6.32/27",
|
||||
"192.178.6.64/27",
|
||||
"192.178.6.96/27",
|
||||
"34.100.182.96/28",
|
||||
"34.101.50.144/28",
|
||||
"34.118.254.0/28",
|
||||
"34.118.66.0/28",
|
||||
"34.126.178.96/28",
|
||||
"34.146.150.144/28",
|
||||
"34.147.110.144/28",
|
||||
"34.151.74.144/28",
|
||||
"34.152.50.64/28",
|
||||
"34.154.114.144/28",
|
||||
"34.155.98.32/28",
|
||||
"34.165.18.176/28",
|
||||
"34.175.160.64/28",
|
||||
"34.176.130.16/28",
|
||||
"34.22.85.0/27",
|
||||
"34.64.82.64/28",
|
||||
"34.65.242.112/28",
|
||||
"34.80.50.80/28",
|
||||
"34.88.194.0/28",
|
||||
"34.89.10.80/28",
|
||||
"34.89.198.80/28",
|
||||
"34.96.162.48/28",
|
||||
"35.247.243.240/28",
|
||||
"66.249.64.0/27",
|
||||
"66.249.64.128/27",
|
||||
"66.249.64.160/27",
|
||||
"66.249.64.224/27",
|
||||
"66.249.64.32/27",
|
||||
"66.249.64.64/27",
|
||||
"66.249.64.96/27",
|
||||
"66.249.65.0/27",
|
||||
"66.249.65.128/27",
|
||||
"66.249.65.160/27",
|
||||
"66.249.65.192/27",
|
||||
"66.249.65.224/27",
|
||||
"66.249.65.32/27",
|
||||
"66.249.65.64/27",
|
||||
"66.249.65.96/27",
|
||||
"66.249.66.0/27",
|
||||
"66.249.66.128/27",
|
||||
"66.249.66.160/27",
|
||||
"66.249.66.192/27",
|
||||
"66.249.66.224/27",
|
||||
"66.249.66.32/27",
|
||||
"66.249.66.64/27",
|
||||
"66.249.66.96/27",
|
||||
"66.249.68.0/27",
|
||||
"66.249.68.128/27",
|
||||
"66.249.68.32/27",
|
||||
"66.249.68.64/27",
|
||||
"66.249.68.96/27",
|
||||
"66.249.69.0/27",
|
||||
"66.249.69.128/27",
|
||||
"66.249.69.160/27",
|
||||
"66.249.69.192/27",
|
||||
"66.249.69.224/27",
|
||||
"66.249.69.32/27",
|
||||
"66.249.69.64/27",
|
||||
"66.249.69.96/27",
|
||||
"66.249.70.0/27",
|
||||
"66.249.70.128/27",
|
||||
"66.249.70.160/27",
|
||||
"66.249.70.192/27",
|
||||
"66.249.70.224/27",
|
||||
"66.249.70.32/27",
|
||||
"66.249.70.64/27",
|
||||
"66.249.70.96/27",
|
||||
"66.249.71.0/27",
|
||||
"66.249.71.128/27",
|
||||
"66.249.71.160/27",
|
||||
"66.249.71.192/27",
|
||||
"66.249.71.224/27",
|
||||
"66.249.71.32/27",
|
||||
"66.249.71.64/27",
|
||||
"66.249.71.96/27",
|
||||
"66.249.72.0/27",
|
||||
"66.249.72.128/27",
|
||||
"66.249.72.160/27",
|
||||
"66.249.72.192/27",
|
||||
"66.249.72.224/27",
|
||||
"66.249.72.32/27",
|
||||
"66.249.72.64/27",
|
||||
"66.249.72.96/27",
|
||||
"66.249.73.0/27",
|
||||
"66.249.73.128/27",
|
||||
"66.249.73.160/27",
|
||||
"66.249.73.192/27",
|
||||
"66.249.73.224/27",
|
||||
"66.249.73.32/27",
|
||||
"66.249.73.64/27",
|
||||
"66.249.73.96/27",
|
||||
"66.249.74.0/27",
|
||||
"66.249.74.128/27",
|
||||
"66.249.74.160/27",
|
||||
"66.249.74.192/27",
|
||||
"66.249.74.32/27",
|
||||
"66.249.74.64/27",
|
||||
"66.249.74.96/27",
|
||||
"66.249.75.0/27",
|
||||
"66.249.75.128/27",
|
||||
"66.249.75.160/27",
|
||||
"66.249.75.192/27",
|
||||
"66.249.75.224/27",
|
||||
"66.249.75.32/27",
|
||||
"66.249.75.64/27",
|
||||
"66.249.75.96/27",
|
||||
"66.249.76.0/27",
|
||||
"66.249.76.128/27",
|
||||
"66.249.76.160/27",
|
||||
"66.249.76.192/27",
|
||||
"66.249.76.224/27",
|
||||
"66.249.76.32/27",
|
||||
"66.249.76.64/27",
|
||||
"66.249.76.96/27",
|
||||
"66.249.77.0/27",
|
||||
"66.249.77.128/27",
|
||||
"66.249.77.160/27",
|
||||
"66.249.77.192/27",
|
||||
"66.249.77.224/27",
|
||||
"66.249.77.32/27",
|
||||
"66.249.77.64/27",
|
||||
"66.249.77.96/27",
|
||||
"66.249.78.0/27",
|
||||
"66.249.78.32/27",
|
||||
"66.249.79.0/27",
|
||||
"66.249.79.128/27",
|
||||
"66.249.79.160/27",
|
||||
"66.249.79.192/27",
|
||||
"66.249.79.224/27",
|
||||
"66.249.79.32/27",
|
||||
"66.249.79.64/27",
|
||||
"66.249.79.96/27"
|
||||
]
|
||||
},
|
||||
{
|
||||
"_comment": "Based on: https://www.bing.com/toolbox/bingbot.json",
|
||||
"name": "bingbot",
|
||||
"user_agent_regex": "\\+http\\://www\\.bing\\.com/bingbot\\.htm",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"157.55.39.0/24",
|
||||
"207.46.13.0/24",
|
||||
"40.77.167.0/24",
|
||||
"13.66.139.0/24",
|
||||
"13.66.144.0/24",
|
||||
"52.167.144.0/24",
|
||||
"13.67.10.16/28",
|
||||
"13.69.66.240/28",
|
||||
"13.71.172.224/28",
|
||||
"139.217.52.0/28",
|
||||
"191.233.204.224/28",
|
||||
"20.36.108.32/28",
|
||||
"20.43.120.16/28",
|
||||
"40.79.131.208/28",
|
||||
"40.79.186.176/28",
|
||||
"52.231.148.0/28",
|
||||
"20.79.107.240/28",
|
||||
"51.105.67.0/28",
|
||||
"20.125.163.80/28",
|
||||
"40.77.188.0/22",
|
||||
"65.55.210.0/24",
|
||||
"199.30.24.0/23",
|
||||
"40.77.202.0/24",
|
||||
"40.77.139.0/25",
|
||||
"20.74.197.0/28",
|
||||
"20.15.133.160/27",
|
||||
"40.77.177.0/24",
|
||||
"40.77.178.0/23"
|
||||
]
|
||||
},
|
||||
{
|
||||
"_comment": "Based on: https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json",
|
||||
"name": "qwantbot",
|
||||
"user_agent_regex": "\\+https\\://help\\.qwant\\.com/bot/",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"91.242.162.0/24"
|
||||
]
|
||||
},
|
||||
{
|
||||
"_comment": "Based on: https://kagi.com/bot",
|
||||
"name": "kagibot",
|
||||
"user_agent_regex": "\\+https\\://kagi\\.com/bot",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"216.18.205.234/32",
|
||||
"35.212.27.76/32",
|
||||
"104.254.65.50/32",
|
||||
"209.151.156.194/32"
|
||||
]
|
||||
},
|
||||
{
|
||||
"_comment": "Received over email from marginalia operator",
|
||||
"name": "marginalia",
|
||||
"user_agent_regex": "search\\.marginalia\\.nu",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"193.183.0.162/31",
|
||||
"193.183.0.164/30",
|
||||
"193.183.0.168/30",
|
||||
"193.183.0.172/31",
|
||||
"193.183.0.174/32"
|
||||
]
|
||||
},
|
||||
{
|
||||
"_comment": "Based on: https://www.mojeek.com/bot.html and manual admin confirmation in a GitHub thread: https://github.com/TecharoHQ/anubis/issues/47#issuecomment-2743815019",
|
||||
"name": "mojeekbot",
|
||||
"user_agent_regex": "http\\://www\\.mojeek\\.com/bot\\.html",
|
||||
"action": "ALLOW",
|
||||
"remote_addresses": [
|
||||
"5.102.173.71/32"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "us-artificial-intelligence-scraper",
|
||||
"user_agent_regex": "\\+https\\://github\\.com/US-Artificial-Intelligence/scraper",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "well-known",
|
||||
"path_regex": "^/.well-known/.*$",
|
||||
"action": "ALLOW"
|
||||
},
|
||||
{
|
||||
"name": "favicon",
|
||||
"path_regex": "^/favicon.ico$",
|
||||
"action": "ALLOW"
|
||||
},
|
||||
{
|
||||
"name": "robots-txt",
|
||||
"path_regex": "^/robots.txt$",
|
||||
"action": "ALLOW"
|
||||
},
|
||||
{
|
||||
"name": "lightpanda",
|
||||
"user_agent_regex": "^Lightpanda/.*$",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "headless-chrome",
|
||||
"user_agent_regex": "(?i:headlesschrom(e|ium))",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "generic-bot-catchall",
|
||||
"user_agent_regex": "(?i:bot|crawler)",
|
||||
"action": "CHALLENGE",
|
||||
"challenge": {
|
||||
"difficulty": 16,
|
||||
"report_as": 4,
|
||||
"algorithm": "slow"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "generic-browser",
|
||||
"user_agent_regex": "Mozilla",
|
||||
"action": "CHALLENGE"
|
||||
}
|
||||
],
|
||||
"dnsbl": true
|
||||
}
|
||||
87
cmd/anubis/decaymap.go
Normal file
87
cmd/anubis/decaymap.go
Normal file
@@ -0,0 +1,87 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
func zilch[T any]() T {
|
||||
var zero T
|
||||
return zero
|
||||
}
|
||||
|
||||
// DecayMap is a lazy key->value map. It's a wrapper around a map and a mutex. If values exceed their time-to-live, they are pruned at Get time.
|
||||
type DecayMap[K comparable, V any] struct {
|
||||
data map[K]decayMapEntry[V]
|
||||
lock sync.RWMutex
|
||||
}
|
||||
|
||||
type decayMapEntry[V any] struct {
|
||||
Value V
|
||||
expiry time.Time
|
||||
}
|
||||
|
||||
// NewDecayMap creates a new DecayMap of key type K and value type V.
|
||||
//
|
||||
// Key types must be comparable to work with maps.
|
||||
func NewDecayMap[K comparable, V any]() *DecayMap[K, V] {
|
||||
return &DecayMap[K, V]{
|
||||
data: make(map[K]decayMapEntry[V]),
|
||||
}
|
||||
}
|
||||
|
||||
// expire forcibly expires a key by setting its time-to-live one second in the past.
|
||||
func (m *DecayMap[K, V]) expire(key K) bool {
|
||||
m.lock.RLock()
|
||||
val, ok := m.data[key]
|
||||
m.lock.RUnlock()
|
||||
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
m.lock.Lock()
|
||||
val.expiry = time.Now().Add(-1 * time.Second)
|
||||
m.data[key] = val
|
||||
m.lock.Unlock()
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// Get gets a value from the DecayMap by key.
|
||||
//
|
||||
// If a value has expired, forcibly delete it if it was not updated.
|
||||
func (m *DecayMap[K, V]) Get(key K) (V, bool) {
|
||||
m.lock.RLock()
|
||||
value, ok := m.data[key]
|
||||
m.lock.RUnlock()
|
||||
|
||||
if !ok {
|
||||
return zilch[V](), false
|
||||
}
|
||||
|
||||
if time.Now().After(value.expiry) {
|
||||
m.lock.Lock()
|
||||
// Since previously reading m.data[key], the value may have been updated.
|
||||
// Delete the entry only if the expiry time is still the same.
|
||||
if m.data[key].expiry == value.expiry {
|
||||
delete(m.data, key)
|
||||
}
|
||||
m.lock.Unlock()
|
||||
|
||||
return zilch[V](), false
|
||||
}
|
||||
|
||||
return value.Value, true
|
||||
}
|
||||
|
||||
// Set sets a key value pair in the map.
|
||||
func (m *DecayMap[K, V]) Set(key K, value V, ttl time.Duration) {
|
||||
m.lock.Lock()
|
||||
defer m.lock.Unlock()
|
||||
|
||||
m.data[key] = decayMapEntry[V]{
|
||||
Value: value,
|
||||
expiry: time.Now().Add(ttl),
|
||||
}
|
||||
}
|
||||
31
cmd/anubis/decaymap_test.go
Normal file
31
cmd/anubis/decaymap_test.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestDecayMap(t *testing.T) {
|
||||
dm := NewDecayMap[string, string]()
|
||||
|
||||
dm.Set("test", "hi", 5*time.Minute)
|
||||
|
||||
val, ok := dm.Get("test")
|
||||
if !ok {
|
||||
t.Error("somehow the test key was not set")
|
||||
}
|
||||
|
||||
if val != "hi" {
|
||||
t.Errorf("wanted value %q, got: %q", "hi", val)
|
||||
}
|
||||
|
||||
ok = dm.expire("test")
|
||||
if !ok {
|
||||
t.Error("somehow could not force-expire the test key")
|
||||
}
|
||||
|
||||
_, ok = dm.Get("test")
|
||||
if ok {
|
||||
t.Error("got value even though it was supposed to be expired")
|
||||
}
|
||||
}
|
||||
217
cmd/anubis/index.templ
Normal file
217
cmd/anubis/index.templ
Normal file
@@ -0,0 +1,217 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/TecharoHQ/anubis"
|
||||
"github.com/TecharoHQ/anubis/xess"
|
||||
)
|
||||
|
||||
templ base(title string, body templ.Component) {
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>{ title }</title>
|
||||
<link rel="stylesheet" href={ xess.URL }/>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
||||
<style>
|
||||
body,
|
||||
html {
|
||||
height: 100%;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
.centered-div {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.lds-roller,
|
||||
.lds-roller div,
|
||||
.lds-roller div:after {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
.lds-roller {
|
||||
display: inline-block;
|
||||
position: relative;
|
||||
width: 80px;
|
||||
height: 80px;
|
||||
}
|
||||
|
||||
.lds-roller div {
|
||||
animation: lds-roller 1.2s cubic-bezier(0.5, 0, 0.5, 1) infinite;
|
||||
transform-origin: 40px 40px;
|
||||
}
|
||||
|
||||
.lds-roller div:after {
|
||||
content: " ";
|
||||
display: block;
|
||||
position: absolute;
|
||||
width: 7.2px;
|
||||
height: 7.2px;
|
||||
border-radius: 50%;
|
||||
background: currentColor;
|
||||
margin: -3.6px 0 0 -3.6px;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(1) {
|
||||
animation-delay: -0.036s;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(1):after {
|
||||
top: 62.62742px;
|
||||
left: 62.62742px;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(2) {
|
||||
animation-delay: -0.072s;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(2):after {
|
||||
top: 67.71281px;
|
||||
left: 56px;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(3) {
|
||||
animation-delay: -0.108s;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(3):after {
|
||||
top: 70.90963px;
|
||||
left: 48.28221px;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(4) {
|
||||
animation-delay: -0.144s;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(4):after {
|
||||
top: 72px;
|
||||
left: 40px;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(5) {
|
||||
animation-delay: -0.18s;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(5):after {
|
||||
top: 70.90963px;
|
||||
left: 31.71779px;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(6) {
|
||||
animation-delay: -0.216s;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(6):after {
|
||||
top: 67.71281px;
|
||||
left: 24px;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(7) {
|
||||
animation-delay: -0.252s;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(7):after {
|
||||
top: 62.62742px;
|
||||
left: 17.37258px;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(8) {
|
||||
animation-delay: -0.288s;
|
||||
}
|
||||
|
||||
.lds-roller div:nth-child(8):after {
|
||||
top: 56px;
|
||||
left: 12.28719px;
|
||||
}
|
||||
|
||||
@keyframes lds-roller {
|
||||
0% {
|
||||
transform: rotate(0deg);
|
||||
}
|
||||
|
||||
100% {
|
||||
transform: rotate(360deg);
|
||||
}
|
||||
}
|
||||
</style>
|
||||
@templ.JSONScript("anubis_version", anubis.Version)
|
||||
</head>
|
||||
<body id="top">
|
||||
<main>
|
||||
<center>
|
||||
<h1 id="title" class=".centered-div">{ title }</h1>
|
||||
</center>
|
||||
@body
|
||||
<footer>
|
||||
<center>
|
||||
<p>
|
||||
Protected by <a href="https://github.com/TecharoHQ/anubis">Anubis</a> from <a
|
||||
href="https://techaro.lol"
|
||||
>Techaro</a>. Made with ❤️ in 🇨🇦.
|
||||
</p>
|
||||
</center>
|
||||
</footer>
|
||||
</main>
|
||||
</body>
|
||||
</html>
|
||||
}
|
||||
|
||||
templ index() {
|
||||
<div class="centered-div">
|
||||
<img
|
||||
id="image"
|
||||
style="width:100%;max-width:256px;"
|
||||
src={ "/.within.website/x/cmd/anubis/static/img/pensive.webp?cacheBuster=" +
|
||||
anubis.Version }
|
||||
/>
|
||||
<img
|
||||
style="display:none;"
|
||||
style="width:100%;max-width:256px;"
|
||||
src={ "/.within.website/x/cmd/anubis/static/img/happy.webp?cacheBuster=" +
|
||||
anubis.Version }
|
||||
/>
|
||||
<p id="status">Loading...</p>
|
||||
<script async type="module" src={ "/.within.website/x/cmd/anubis/static/js/main.mjs?cacheBuster=" + anubis.Version }></script>
|
||||
<div id="spinner" class="lds-roller">
|
||||
<div></div>
|
||||
<div></div>
|
||||
<div></div>
|
||||
<div></div>
|
||||
<div></div>
|
||||
<div></div>
|
||||
<div></div>
|
||||
<div></div>
|
||||
</div>
|
||||
<details>
|
||||
<summary>Why am I seeing this?</summary>
|
||||
<p>You are seeing this because the administrator of this website has set up <a href="https://github.com/TecharoHQ/anubis">Anubis</a> to protect the server against the scourge of <a href="https://thelibre.news/foss-infrastructure-is-under-attack-by-ai-companies/">AI companies aggressively scraping websites</a>. This can and does cause downtime for the websites, which makes their resources inaccessible for everyone.</p>
|
||||
<p>Anubis is a compromise. Anubis uses a <a href="https://anubis.techaro.lol/docs/design/why-proof-of-work">Proof-of-Work</a> scheme in the vein of <a href="https://en.wikipedia.org/wiki/Hashcash">Hashcash</a>, a proposed proof-of-work scheme for reducing email spam. The idea is that at individual scales the additional load is ignorable, but at mass scraper levels it adds up and makes scraping much more expensive.</p>
|
||||
<p>Ultimately, this is a hack whose real purpose is to give a "good enough" placeholder solution so that more time can be spent on fingerprinting and identifying headless browsers (EG: via how they do font rendering) so that the challenge proof of work page doesn't need to be presented to users that are much more likely to be legitimate.</p>
|
||||
<p>Please note that Anubis requires the use of modern JavaScript features that plugins like <a href="https://jshelter.org/">JShelter</a> will disable. Please disable JShelter or other such plugins for this domain.</p>
|
||||
</details>
|
||||
<noscript>
|
||||
<p>
|
||||
Sadly, you must enable JavaScript to get past this challenge. This is required because AI companies have changed
|
||||
the social contract around how website hosting works. A no-JS solution is a work-in-progress.
|
||||
</p>
|
||||
</noscript>
|
||||
<div id="testarea"></div>
|
||||
</div>
|
||||
}
|
||||
|
||||
templ errorPage(message string) {
|
||||
<div class="centered-div">
|
||||
<img
|
||||
id="image"
|
||||
style="width:100%;max-width:256px;"
|
||||
src={ "/.within.website/x/cmd/anubis/static/img/sad.webp?cacheBuster=" + anubis.Version }
|
||||
/>
|
||||
<p>{ message }.</p>
|
||||
<button onClick="window.location.reload();">Try again</button>
|
||||
<p><a href="/">Go home</a></p>
|
||||
</div>
|
||||
}
|
||||
225
cmd/anubis/index_templ.go
Normal file
225
cmd/anubis/index_templ.go
Normal file
@@ -0,0 +1,225 @@
|
||||
// Code generated by templ - DO NOT EDIT.
|
||||
|
||||
// templ: version: v0.3.833
|
||||
package main
|
||||
|
||||
//lint:file-ignore SA4006 This context is only used if a nested component is present.
|
||||
|
||||
import "github.com/a-h/templ"
|
||||
import templruntime "github.com/a-h/templ/runtime"
|
||||
|
||||
import (
|
||||
"github.com/TecharoHQ/anubis"
|
||||
"github.com/TecharoHQ/anubis/xess"
|
||||
)
|
||||
|
||||
func base(title string, body templ.Component) templ.Component {
|
||||
return templruntime.GeneratedTemplate(func(templ_7745c5c3_Input templruntime.GeneratedComponentInput) (templ_7745c5c3_Err error) {
|
||||
templ_7745c5c3_W, ctx := templ_7745c5c3_Input.Writer, templ_7745c5c3_Input.Context
|
||||
if templ_7745c5c3_CtxErr := ctx.Err(); templ_7745c5c3_CtxErr != nil {
|
||||
return templ_7745c5c3_CtxErr
|
||||
}
|
||||
templ_7745c5c3_Buffer, templ_7745c5c3_IsBuffer := templruntime.GetBuffer(templ_7745c5c3_W)
|
||||
if !templ_7745c5c3_IsBuffer {
|
||||
defer func() {
|
||||
templ_7745c5c3_BufErr := templruntime.ReleaseBuffer(templ_7745c5c3_Buffer)
|
||||
if templ_7745c5c3_Err == nil {
|
||||
templ_7745c5c3_Err = templ_7745c5c3_BufErr
|
||||
}
|
||||
}()
|
||||
}
|
||||
ctx = templ.InitializeContext(ctx)
|
||||
templ_7745c5c3_Var1 := templ.GetChildren(ctx)
|
||||
if templ_7745c5c3_Var1 == nil {
|
||||
templ_7745c5c3_Var1 = templ.NopComponent
|
||||
}
|
||||
ctx = templ.ClearChildren(ctx)
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 1, "<!doctype html><html><head><title>")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
var templ_7745c5c3_Var2 string
|
||||
templ_7745c5c3_Var2, templ_7745c5c3_Err = templ.JoinStringErrs(title)
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 12, Col: 17}
|
||||
}
|
||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var2))
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 2, "</title><link rel=\"stylesheet\" href=\"")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
var templ_7745c5c3_Var3 string
|
||||
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(xess.URL)
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 13, Col: 41}
|
||||
}
|
||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 3, "\"><meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\"><style>\n body,\n html {\n height: 100%;\n display: flex;\n justify-content: center;\n align-items: center;\n margin-left: auto;\n margin-right: auto;\n }\n\n .centered-div {\n text-align: center;\n }\n\n .lds-roller,\n .lds-roller div,\n .lds-roller div:after {\n box-sizing: border-box;\n }\n\n .lds-roller {\n display: inline-block;\n position: relative;\n width: 80px;\n height: 80px;\n }\n\n .lds-roller div {\n animation: lds-roller 1.2s cubic-bezier(0.5, 0, 0.5, 1) infinite;\n transform-origin: 40px 40px;\n }\n\n .lds-roller div:after {\n content: \" \";\n display: block;\n position: absolute;\n width: 7.2px;\n height: 7.2px;\n border-radius: 50%;\n background: currentColor;\n margin: -3.6px 0 0 -3.6px;\n }\n\n .lds-roller div:nth-child(1) {\n animation-delay: -0.036s;\n }\n\n .lds-roller div:nth-child(1):after {\n top: 62.62742px;\n left: 62.62742px;\n }\n\n .lds-roller div:nth-child(2) {\n animation-delay: -0.072s;\n }\n\n .lds-roller div:nth-child(2):after {\n top: 67.71281px;\n left: 56px;\n }\n\n .lds-roller div:nth-child(3) {\n animation-delay: -0.108s;\n }\n\n .lds-roller div:nth-child(3):after {\n top: 70.90963px;\n left: 48.28221px;\n }\n\n .lds-roller div:nth-child(4) {\n animation-delay: -0.144s;\n }\n\n .lds-roller div:nth-child(4):after {\n top: 72px;\n left: 40px;\n }\n\n .lds-roller div:nth-child(5) {\n animation-delay: -0.18s;\n }\n\n .lds-roller div:nth-child(5):after {\n top: 70.90963px;\n left: 31.71779px;\n }\n\n .lds-roller div:nth-child(6) {\n animation-delay: -0.216s;\n }\n\n .lds-roller div:nth-child(6):after {\n top: 67.71281px;\n left: 24px;\n }\n\n .lds-roller div:nth-child(7) {\n animation-delay: -0.252s;\n }\n\n .lds-roller div:nth-child(7):after {\n top: 62.62742px;\n left: 17.37258px;\n }\n\n .lds-roller div:nth-child(8) {\n animation-delay: -0.288s;\n }\n\n .lds-roller div:nth-child(8):after {\n top: 56px;\n left: 12.28719px;\n }\n\n @keyframes lds-roller {\n 0% {\n transform: rotate(0deg);\n }\n\n 100% {\n transform: rotate(360deg);\n }\n }\n </style>")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
templ_7745c5c3_Err = templ.JSONScript("anubis_version", anubis.Version).Render(ctx, templ_7745c5c3_Buffer)
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 4, "</head><body id=\"top\"><main><center><h1 id=\"title\" class=\".centered-div\">")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
var templ_7745c5c3_Var4 string
|
||||
templ_7745c5c3_Var4, templ_7745c5c3_Err = templ.JoinStringErrs(title)
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 146, Col: 49}
|
||||
}
|
||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var4))
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 5, "</h1></center>")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
templ_7745c5c3_Err = body.Render(ctx, templ_7745c5c3_Buffer)
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 6, "<footer><center><p>Protected by <a href=\"https://github.com/TecharoHQ/anubis\">Anubis</a> from <a href=\"https://techaro.lol\">Techaro</a>. Made with ❤️ in 🇨🇦.</p></center></footer></main></body></html>")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func index() templ.Component {
|
||||
return templruntime.GeneratedTemplate(func(templ_7745c5c3_Input templruntime.GeneratedComponentInput) (templ_7745c5c3_Err error) {
|
||||
templ_7745c5c3_W, ctx := templ_7745c5c3_Input.Writer, templ_7745c5c3_Input.Context
|
||||
if templ_7745c5c3_CtxErr := ctx.Err(); templ_7745c5c3_CtxErr != nil {
|
||||
return templ_7745c5c3_CtxErr
|
||||
}
|
||||
templ_7745c5c3_Buffer, templ_7745c5c3_IsBuffer := templruntime.GetBuffer(templ_7745c5c3_W)
|
||||
if !templ_7745c5c3_IsBuffer {
|
||||
defer func() {
|
||||
templ_7745c5c3_BufErr := templruntime.ReleaseBuffer(templ_7745c5c3_Buffer)
|
||||
if templ_7745c5c3_Err == nil {
|
||||
templ_7745c5c3_Err = templ_7745c5c3_BufErr
|
||||
}
|
||||
}()
|
||||
}
|
||||
ctx = templ.InitializeContext(ctx)
|
||||
templ_7745c5c3_Var5 := templ.GetChildren(ctx)
|
||||
if templ_7745c5c3_Var5 == nil {
|
||||
templ_7745c5c3_Var5 = templ.NopComponent
|
||||
}
|
||||
ctx = templ.ClearChildren(ctx)
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 7, "<div class=\"centered-div\"><img id=\"image\" style=\"width:100%;max-width:256px;\" src=\"")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
var templ_7745c5c3_Var6 string
|
||||
templ_7745c5c3_Var6, templ_7745c5c3_Err = templ.JoinStringErrs("/.within.website/x/cmd/anubis/static/img/pensive.webp?cacheBuster=" +
|
||||
anubis.Version)
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 169, Col: 18}
|
||||
}
|
||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var6))
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 8, "\"> <img style=\"display:none;\" style=\"width:100%;max-width:256px;\" src=\"")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
var templ_7745c5c3_Var7 string
|
||||
templ_7745c5c3_Var7, templ_7745c5c3_Err = templ.JoinStringErrs("/.within.website/x/cmd/anubis/static/img/happy.webp?cacheBuster=" +
|
||||
anubis.Version)
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 175, Col: 18}
|
||||
}
|
||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var7))
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 9, "\"><p id=\"status\">Loading...</p><script async type=\"module\" src=\"")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
var templ_7745c5c3_Var8 string
|
||||
templ_7745c5c3_Var8, templ_7745c5c3_Err = templ.JoinStringErrs("/.within.website/x/cmd/anubis/static/js/main.mjs?cacheBuster=" + anubis.Version)
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 178, Col: 116}
|
||||
}
|
||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var8))
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 10, "\"></script><div id=\"spinner\" class=\"lds-roller\"><div></div><div></div><div></div><div></div><div></div><div></div><div></div><div></div></div><details><summary>Why am I seeing this?</summary><p>You are seeing this because the administrator of this website has set up <a href=\"https://github.com/TecharoHQ/anubis\">Anubis</a> to protect the server against the scourge of <a href=\"https://thelibre.news/foss-infrastructure-is-under-attack-by-ai-companies/\">AI companies aggressively scraping websites</a>. This can and does cause downtime for the websites, which makes their resources inaccessible for everyone.</p><p>Anubis is a compromise. Anubis uses a <a href=\"https://anubis.techaro.lol/docs/design/why-proof-of-work\">Proof-of-Work</a> scheme in the vein of <a href=\"https://en.wikipedia.org/wiki/Hashcash\">Hashcash</a>, a proposed proof-of-work scheme for reducing email spam. The idea is that at individual scales the additional load is ignorable, but at mass scraper levels it adds up and makes scraping much more expensive.</p><p>Ultimately, this is a hack whose real purpose is to give a \"good enough\" placeholder solution so that more time can be spent on fingerprinting and identifying headless browsers (EG: via how they do font rendering) so that the challenge proof of work page doesn't need to be presented to users that are much more likely to be legitimate.</p><p>Please note that Anubis requires the use of modern JavaScript features that plugins like <a href=\"https://jshelter.org/\">JShelter</a> will disable. Please disable JShelter or other such plugins for this domain.</p></details><noscript><p>Sadly, you must enable JavaScript to get past this challenge. This is required because AI companies have changed the social contract around how website hosting works. A no-JS solution is a work-in-progress.</p></noscript><div id=\"testarea\"></div></div>")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func errorPage(message string) templ.Component {
|
||||
return templruntime.GeneratedTemplate(func(templ_7745c5c3_Input templruntime.GeneratedComponentInput) (templ_7745c5c3_Err error) {
|
||||
templ_7745c5c3_W, ctx := templ_7745c5c3_Input.Writer, templ_7745c5c3_Input.Context
|
||||
if templ_7745c5c3_CtxErr := ctx.Err(); templ_7745c5c3_CtxErr != nil {
|
||||
return templ_7745c5c3_CtxErr
|
||||
}
|
||||
templ_7745c5c3_Buffer, templ_7745c5c3_IsBuffer := templruntime.GetBuffer(templ_7745c5c3_W)
|
||||
if !templ_7745c5c3_IsBuffer {
|
||||
defer func() {
|
||||
templ_7745c5c3_BufErr := templruntime.ReleaseBuffer(templ_7745c5c3_Buffer)
|
||||
if templ_7745c5c3_Err == nil {
|
||||
templ_7745c5c3_Err = templ_7745c5c3_BufErr
|
||||
}
|
||||
}()
|
||||
}
|
||||
ctx = templ.InitializeContext(ctx)
|
||||
templ_7745c5c3_Var9 := templ.GetChildren(ctx)
|
||||
if templ_7745c5c3_Var9 == nil {
|
||||
templ_7745c5c3_Var9 = templ.NopComponent
|
||||
}
|
||||
ctx = templ.ClearChildren(ctx)
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 11, "<div class=\"centered-div\"><img id=\"image\" style=\"width:100%;max-width:256px;\" src=\"")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
var templ_7745c5c3_Var10 string
|
||||
templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs("/.within.website/x/cmd/anubis/static/img/sad.webp?cacheBuster=" + anubis.Version)
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 211, Col: 90}
|
||||
}
|
||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 12, "\"><p>")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
var templ_7745c5c3_Var11 string
|
||||
templ_7745c5c3_Var11, templ_7745c5c3_Err = templ.JoinStringErrs(message)
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 213, Col: 14}
|
||||
}
|
||||
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var11))
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 13, ".</p><button onClick=\"window.location.reload();\">Try again</button><p><a href=\"/\">Go home</a></p></div>")
|
||||
if templ_7745c5c3_Err != nil {
|
||||
return templ_7745c5c3_Err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
var _ = templruntime.GeneratedTemplate
|
||||
162
cmd/anubis/internal/config/config.go
Normal file
162
cmd/anubis/internal/config/config.go
Normal file
@@ -0,0 +1,162 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
type Rule string
|
||||
|
||||
const (
|
||||
RuleUnknown Rule = ""
|
||||
RuleAllow Rule = "ALLOW"
|
||||
RuleDeny Rule = "DENY"
|
||||
RuleChallenge Rule = "CHALLENGE"
|
||||
)
|
||||
|
||||
type Algorithm string
|
||||
|
||||
const (
|
||||
AlgorithmUnknown Algorithm = ""
|
||||
AlgorithmFast Algorithm = "fast"
|
||||
AlgorithmSlow Algorithm = "slow"
|
||||
)
|
||||
|
||||
type Bot struct {
|
||||
Name string `json:"name"`
|
||||
UserAgentRegex *string `json:"user_agent_regex"`
|
||||
PathRegex *string `json:"path_regex"`
|
||||
Action Rule `json:"action"`
|
||||
RemoteAddr []string `json:"remote_addresses"`
|
||||
Challenge *ChallengeRules `json:"challenge,omitempty"`
|
||||
}
|
||||
|
||||
var (
|
||||
ErrNoBotRulesDefined = errors.New("config: must define at least one (1) bot rule")
|
||||
ErrBotMustHaveName = errors.New("config.Bot: must set name")
|
||||
ErrBotMustHaveUserAgentOrPath = errors.New("config.Bot: must set either user_agent_regex, path_regex, or remote_addresses")
|
||||
ErrBotMustHaveUserAgentOrPathNotBoth = errors.New("config.Bot: must set either user_agent_regex, path_regex, and not both")
|
||||
ErrUnknownAction = errors.New("config.Bot: unknown action")
|
||||
ErrInvalidUserAgentRegex = errors.New("config.Bot: invalid user agent regex")
|
||||
ErrInvalidPathRegex = errors.New("config.Bot: invalid path regex")
|
||||
ErrInvalidCIDR = errors.New("config.Bot: invalid CIDR")
|
||||
)
|
||||
|
||||
func (b Bot) Valid() error {
|
||||
var errs []error
|
||||
|
||||
if b.Name == "" {
|
||||
errs = append(errs, ErrBotMustHaveName)
|
||||
}
|
||||
|
||||
if b.UserAgentRegex == nil && b.PathRegex == nil && (b.RemoteAddr == nil || len(b.RemoteAddr) == 0) {
|
||||
errs = append(errs, ErrBotMustHaveUserAgentOrPath)
|
||||
}
|
||||
|
||||
if b.UserAgentRegex != nil && b.PathRegex != nil {
|
||||
errs = append(errs, ErrBotMustHaveUserAgentOrPathNotBoth)
|
||||
}
|
||||
|
||||
if b.UserAgentRegex != nil {
|
||||
if _, err := regexp.Compile(*b.UserAgentRegex); err != nil {
|
||||
errs = append(errs, ErrInvalidUserAgentRegex, err)
|
||||
}
|
||||
}
|
||||
|
||||
if b.PathRegex != nil {
|
||||
if _, err := regexp.Compile(*b.PathRegex); err != nil {
|
||||
errs = append(errs, ErrInvalidPathRegex, err)
|
||||
}
|
||||
}
|
||||
|
||||
if b.RemoteAddr != nil && len(b.RemoteAddr) > 0 {
|
||||
for _, cidr := range b.RemoteAddr {
|
||||
if _, _, err := net.ParseCIDR(cidr); err != nil {
|
||||
errs = append(errs, ErrInvalidCIDR, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch b.Action {
|
||||
case RuleAllow, RuleChallenge, RuleDeny:
|
||||
// okay
|
||||
default:
|
||||
errs = append(errs, fmt.Errorf("%w: %q", ErrUnknownAction, b.Action))
|
||||
}
|
||||
|
||||
if b.Action == RuleChallenge && b.Challenge != nil {
|
||||
if err := b.Challenge.Valid(); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return fmt.Errorf("config: bot entry for %q is not valid:\n%w", b.Name, errors.Join(errs...))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type ChallengeRules struct {
|
||||
Difficulty int `json:"difficulty"`
|
||||
ReportAs int `json:"report_as"`
|
||||
Algorithm Algorithm `json:"algorithm"`
|
||||
}
|
||||
|
||||
var (
|
||||
ErrChallengeRuleHasWrongAlgorithm = errors.New("config.Bot.ChallengeRules: algorithm is invalid")
|
||||
ErrChallengeDifficultyTooLow = errors.New("config.Bot.ChallengeRules: difficulty is too low (must be >= 1)")
|
||||
ErrChallengeDifficultyTooHigh = errors.New("config.Bot.ChallengeRules: difficulty is too high (must be <= 64)")
|
||||
)
|
||||
|
||||
func (cr ChallengeRules) Valid() error {
|
||||
var errs []error
|
||||
|
||||
if cr.Difficulty < 1 {
|
||||
errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooLow, cr.Difficulty))
|
||||
}
|
||||
|
||||
if cr.Difficulty > 64 {
|
||||
errs = append(errs, fmt.Errorf("%w, got: %d", ErrChallengeDifficultyTooHigh, cr.Difficulty))
|
||||
}
|
||||
|
||||
switch cr.Algorithm {
|
||||
case AlgorithmFast, AlgorithmSlow, AlgorithmUnknown:
|
||||
// do nothing, it's all good
|
||||
default:
|
||||
errs = append(errs, fmt.Errorf("%w: %q", ErrChallengeRuleHasWrongAlgorithm, cr.Algorithm))
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return fmt.Errorf("config: challenge rules entry is not valid:\n%w", errors.Join(errs...))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Bots []Bot `json:"bots"`
|
||||
DNSBL bool `json:"dnsbl"`
|
||||
}
|
||||
|
||||
func (c Config) Valid() error {
|
||||
var errs []error
|
||||
|
||||
if len(c.Bots) == 0 {
|
||||
errs = append(errs, ErrNoBotRulesDefined)
|
||||
}
|
||||
|
||||
for _, b := range c.Bots {
|
||||
if err := b.Valid(); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return fmt.Errorf("config is not valid:\n%w", errors.Join(errs...))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,27 +1,24 @@
|
||||
package config_test
|
||||
package config
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/TecharoHQ/anubis/data"
|
||||
. "github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
)
|
||||
|
||||
func p[V any](v V) *V { return &v }
|
||||
|
||||
func TestBotValid(t *testing.T) {
|
||||
var tests = []struct {
|
||||
err error
|
||||
name string
|
||||
bot BotConfig
|
||||
bot Bot
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "simple user agent",
|
||||
bot: BotConfig{
|
||||
bot: Bot{
|
||||
Name: "mozilla-ua",
|
||||
Action: RuleChallenge,
|
||||
UserAgentRegex: p("Mozilla"),
|
||||
@@ -30,7 +27,7 @@ func TestBotValid(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "simple path",
|
||||
bot: BotConfig{
|
||||
bot: Bot{
|
||||
Name: "well-known-path",
|
||||
Action: RuleAllow,
|
||||
PathRegex: p("^/.well-known/.*$"),
|
||||
@@ -39,7 +36,7 @@ func TestBotValid(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "no rule name",
|
||||
bot: BotConfig{
|
||||
bot: Bot{
|
||||
Action: RuleChallenge,
|
||||
UserAgentRegex: p("Mozilla"),
|
||||
},
|
||||
@@ -47,7 +44,7 @@ func TestBotValid(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "no rule matcher",
|
||||
bot: BotConfig{
|
||||
bot: Bot{
|
||||
Name: "broken-rule",
|
||||
Action: RuleAllow,
|
||||
},
|
||||
@@ -55,7 +52,7 @@ func TestBotValid(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "both user-agent and path",
|
||||
bot: BotConfig{
|
||||
bot: Bot{
|
||||
Name: "path-and-user-agent",
|
||||
Action: RuleDeny,
|
||||
UserAgentRegex: p("Mozilla"),
|
||||
@@ -65,7 +62,7 @@ func TestBotValid(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "unknown action",
|
||||
bot: BotConfig{
|
||||
bot: Bot{
|
||||
Name: "Unknown action",
|
||||
Action: RuleUnknown,
|
||||
UserAgentRegex: p("Mozilla"),
|
||||
@@ -74,7 +71,7 @@ func TestBotValid(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "invalid user agent regex",
|
||||
bot: BotConfig{
|
||||
bot: Bot{
|
||||
Name: "mozilla-ua",
|
||||
Action: RuleChallenge,
|
||||
UserAgentRegex: p("a(b"),
|
||||
@@ -83,33 +80,21 @@ func TestBotValid(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "invalid path regex",
|
||||
bot: BotConfig{
|
||||
bot: Bot{
|
||||
Name: "mozilla-ua",
|
||||
Action: RuleChallenge,
|
||||
PathRegex: p("a(b"),
|
||||
},
|
||||
err: ErrInvalidPathRegex,
|
||||
},
|
||||
{
|
||||
name: "invalid headers regex",
|
||||
bot: BotConfig{
|
||||
Name: "mozilla-ua",
|
||||
Action: RuleChallenge,
|
||||
HeadersRegex: map[string]string{
|
||||
"Content-Type": "a(b",
|
||||
},
|
||||
PathRegex: p("a(b"),
|
||||
},
|
||||
err: ErrInvalidHeadersRegex,
|
||||
},
|
||||
{
|
||||
name: "challenge difficulty too low",
|
||||
bot: BotConfig{
|
||||
bot: Bot{
|
||||
Name: "mozilla-ua",
|
||||
Action: RuleChallenge,
|
||||
PathRegex: p("Mozilla"),
|
||||
Challenge: &ChallengeRules{
|
||||
Difficulty: -1,
|
||||
Difficulty: 0,
|
||||
ReportAs: 4,
|
||||
Algorithm: "fast",
|
||||
},
|
||||
@@ -118,7 +103,7 @@ func TestBotValid(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "challenge difficulty too high",
|
||||
bot: BotConfig{
|
||||
bot: Bot{
|
||||
Name: "mozilla-ua",
|
||||
Action: RuleChallenge,
|
||||
PathRegex: p("Mozilla"),
|
||||
@@ -130,9 +115,23 @@ func TestBotValid(t *testing.T) {
|
||||
},
|
||||
err: ErrChallengeDifficultyTooHigh,
|
||||
},
|
||||
{
|
||||
name: "challenge wrong algorithm",
|
||||
bot: Bot{
|
||||
Name: "mozilla-ua",
|
||||
Action: RuleChallenge,
|
||||
PathRegex: p("Mozilla"),
|
||||
Challenge: &ChallengeRules{
|
||||
Difficulty: 420,
|
||||
ReportAs: 4,
|
||||
Algorithm: "high quality rips",
|
||||
},
|
||||
},
|
||||
err: ErrChallengeRuleHasWrongAlgorithm,
|
||||
},
|
||||
{
|
||||
name: "invalid cidr range",
|
||||
bot: BotConfig{
|
||||
bot: Bot{
|
||||
Name: "mozilla-ua",
|
||||
Action: RuleAllow,
|
||||
RemoteAddr: []string{"0.0.0.0/33"},
|
||||
@@ -141,7 +140,7 @@ func TestBotValid(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "only filter by IP range",
|
||||
bot: BotConfig{
|
||||
bot: Bot{
|
||||
Name: "mozilla-ua",
|
||||
Action: RuleAllow,
|
||||
RemoteAddr: []string{"0.0.0.0/0"},
|
||||
@@ -150,7 +149,7 @@ func TestBotValid(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "filter by user agent and IP range",
|
||||
bot: BotConfig{
|
||||
bot: Bot{
|
||||
Name: "mozilla-ua",
|
||||
Action: RuleAllow,
|
||||
UserAgentRegex: p("Mozilla"),
|
||||
@@ -160,7 +159,7 @@ func TestBotValid(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "filter by path and IP range",
|
||||
bot: BotConfig{
|
||||
bot: Bot{
|
||||
Name: "mozilla-ua",
|
||||
Action: RuleAllow,
|
||||
PathRegex: p("^.*$"),
|
||||
@@ -168,25 +167,6 @@ func TestBotValid(t *testing.T) {
|
||||
},
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "weight rule without weight",
|
||||
bot: BotConfig{
|
||||
Name: "weight-adjust-if-mozilla",
|
||||
Action: RuleWeigh,
|
||||
UserAgentRegex: p("Mozilla"),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "weight rule with weight adjust",
|
||||
bot: BotConfig{
|
||||
Name: "weight-adjust-if-mozilla",
|
||||
Action: RuleWeigh,
|
||||
UserAgentRegex: p("Mozilla"),
|
||||
Weight: &Weight{
|
||||
Adjust: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, cs := range tests {
|
||||
@@ -226,73 +206,13 @@ func TestConfigValidKnownGood(t *testing.T) {
|
||||
}
|
||||
defer fin.Close()
|
||||
|
||||
c, err := Load(fin, st.Name())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
var c Config
|
||||
if err := json.NewDecoder(fin).Decode(&c); err != nil {
|
||||
t.Fatalf("can't decode file: %v", err)
|
||||
}
|
||||
|
||||
if err := c.Valid(); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(c.Bots) == 0 {
|
||||
t.Error("wanted more than 0 bots, got zero")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestImportStatement(t *testing.T) {
|
||||
type testCase struct {
|
||||
err error
|
||||
name string
|
||||
importPath string
|
||||
}
|
||||
|
||||
var tests []testCase
|
||||
|
||||
for _, folderName := range []string{
|
||||
"apps",
|
||||
"bots",
|
||||
"common",
|
||||
"crawlers",
|
||||
"meta",
|
||||
} {
|
||||
if err := fs.WalkDir(data.BotPolicies, folderName, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
if d.Name() == "README.md" {
|
||||
return nil
|
||||
}
|
||||
|
||||
tests = append(tests, testCase{
|
||||
name: "(data)/" + path,
|
||||
importPath: "(data)/" + path,
|
||||
err: nil,
|
||||
})
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
is := &ImportStatement{
|
||||
Import: tt.importPath,
|
||||
}
|
||||
|
||||
if err := is.Valid(); err != nil {
|
||||
t.Errorf("validation error: %v", err)
|
||||
}
|
||||
|
||||
if len(is.Bots) == 0 {
|
||||
t.Error("wanted bot definitions, but got none")
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -313,8 +233,12 @@ func TestConfigValidBad(t *testing.T) {
|
||||
}
|
||||
defer fin.Close()
|
||||
|
||||
_, err = Load(fin, filepath.Join("testdata", "bad", st.Name()))
|
||||
if err == nil {
|
||||
var c Config
|
||||
if err := json.NewDecoder(fin).Decode(&c); err != nil {
|
||||
t.Fatalf("can't decode file: %v", err)
|
||||
}
|
||||
|
||||
if err := c.Valid(); err == nil {
|
||||
t.Fatal("validation should have failed but didn't somehow")
|
||||
} else {
|
||||
t.Log(err)
|
||||
@@ -322,49 +246,3 @@ func TestConfigValidBad(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBotConfigZero(t *testing.T) {
|
||||
var b BotConfig
|
||||
if !b.Zero() {
|
||||
t.Error("zero value config.BotConfig is not zero value")
|
||||
}
|
||||
|
||||
b.Name = "hi"
|
||||
if b.Zero() {
|
||||
t.Error("config.BotConfig with name is zero value")
|
||||
}
|
||||
|
||||
b.UserAgentRegex = p(".*")
|
||||
if b.Zero() {
|
||||
t.Error("config.BotConfig with user agent regex is zero value")
|
||||
}
|
||||
|
||||
b.PathRegex = p(".*")
|
||||
if b.Zero() {
|
||||
t.Error("config.BotConfig with path regex is zero value")
|
||||
}
|
||||
|
||||
b.HeadersRegex = map[string]string{"hi": "there"}
|
||||
if b.Zero() {
|
||||
t.Error("config.BotConfig with headers regex is zero value")
|
||||
}
|
||||
|
||||
b.Action = RuleAllow
|
||||
if b.Zero() {
|
||||
t.Error("config.BotConfig with action is zero value")
|
||||
}
|
||||
|
||||
b.RemoteAddr = []string{"::/0"}
|
||||
if b.Zero() {
|
||||
t.Error("config.BotConfig with remote addresses is zero value")
|
||||
}
|
||||
|
||||
b.Challenge = &ChallengeRules{
|
||||
Difficulty: 4,
|
||||
ReportAs: 4,
|
||||
Algorithm: DefaultAlgorithm,
|
||||
}
|
||||
if b.Zero() {
|
||||
t.Error("config.BotConfig with challenge rules is zero value")
|
||||
}
|
||||
}
|
||||
@@ -9,13 +9,6 @@
|
||||
"name": "user-agent-bad",
|
||||
"user_agent_regex": "a(b",
|
||||
"action": "DENY"
|
||||
},
|
||||
{
|
||||
"name": "headers-bad",
|
||||
"headers": {
|
||||
"Accept-Encoding": "a(b"
|
||||
},
|
||||
"action": "DENY"
|
||||
}
|
||||
]
|
||||
}
|
||||
89
cmd/anubis/js/main.mjs
Normal file
89
cmd/anubis/js/main.mjs
Normal file
@@ -0,0 +1,89 @@
|
||||
import processFast from "./proof-of-work.mjs";
|
||||
import processSlow from "./proof-of-work-slow.mjs";
|
||||
import { testVideo } from "./video.mjs";
|
||||
|
||||
const algorithms = {
|
||||
"fast": processFast,
|
||||
"slow": processSlow,
|
||||
}
|
||||
|
||||
// from Xeact
|
||||
const u = (url = "", params = {}) => {
|
||||
let result = new URL(url, window.location.href);
|
||||
Object.entries(params).forEach((kv) => {
|
||||
let [k, v] = kv;
|
||||
result.searchParams.set(k, v);
|
||||
});
|
||||
return result.toString();
|
||||
};
|
||||
|
||||
const imageURL = (mood, cacheBuster) =>
|
||||
u(`/.within.website/x/cmd/anubis/static/img/${mood}.webp`, { cacheBuster });
|
||||
|
||||
(async () => {
|
||||
const status = document.getElementById('status');
|
||||
const image = document.getElementById('image');
|
||||
const title = document.getElementById('title');
|
||||
const spinner = document.getElementById('spinner');
|
||||
const anubisVersion = JSON.parse(document.getElementById('anubis_version').textContent);
|
||||
|
||||
// const testarea = document.getElementById('testarea');
|
||||
|
||||
// const videoWorks = await testVideo(testarea);
|
||||
// console.log(`videoWorks: ${videoWorks}`);
|
||||
|
||||
// if (!videoWorks) {
|
||||
// title.innerHTML = "Oh no!";
|
||||
// status.innerHTML = "Checks failed. Please check your browser's settings and try again.";
|
||||
// image.src = imageURL("sad");
|
||||
// spinner.innerHTML = "";
|
||||
// spinner.style.display = "none";
|
||||
// return;
|
||||
// }
|
||||
|
||||
status.innerHTML = 'Calculating...';
|
||||
|
||||
const { challenge, rules } = await fetch("/.within.website/x/cmd/anubis/api/make-challenge", { method: "POST" })
|
||||
.then(r => {
|
||||
if (!r.ok) {
|
||||
throw new Error("Failed to fetch config");
|
||||
}
|
||||
return r.json();
|
||||
})
|
||||
.catch(err => {
|
||||
title.innerHTML = "Oh no!";
|
||||
status.innerHTML = `Failed to fetch config: ${err.message}`;
|
||||
image.src = imageURL("sad", anubisVersion);
|
||||
spinner.innerHTML = "";
|
||||
spinner.style.display = "none";
|
||||
throw err;
|
||||
});
|
||||
|
||||
const process = algorithms[rules.algorithm];
|
||||
if (!process) {
|
||||
title.innerHTML = "Oh no!";
|
||||
status.innerHTML = `Failed to resolve check algorithm. You may want to reload the page.`;
|
||||
image.src = imageURL("sad", anubisVersion);
|
||||
spinner.innerHTML = "";
|
||||
spinner.style.display = "none";
|
||||
return;
|
||||
}
|
||||
|
||||
status.innerHTML = `Calculating...<br/>Difficulty: ${rules.report_as}`;
|
||||
|
||||
const t0 = Date.now();
|
||||
const { hash, nonce } = await process(challenge, rules.difficulty);
|
||||
const t1 = Date.now();
|
||||
console.log({ hash, nonce });
|
||||
|
||||
title.innerHTML = "Success!";
|
||||
status.innerHTML = `Done! Took ${t1 - t0}ms, ${nonce} iterations`;
|
||||
image.src = imageURL("happy", anubisVersion);
|
||||
spinner.innerHTML = "";
|
||||
spinner.style.display = "none";
|
||||
|
||||
setTimeout(() => {
|
||||
const redir = window.location.href;
|
||||
window.location.href = u("/.within.website/x/cmd/anubis/api/pass-challenge", { response: hash, nonce, redir, elapsedTime: t1 - t0 });
|
||||
}, 250);
|
||||
})();
|
||||
63
cmd/anubis/js/proof-of-work-slow.mjs
Normal file
63
cmd/anubis/js/proof-of-work-slow.mjs
Normal file
@@ -0,0 +1,63 @@
|
||||
// https://dev.to/ratmd/simple-proof-of-work-in-javascript-3kgm
|
||||
|
||||
export default function process(data, difficulty = 5, _threads = 1) {
|
||||
console.debug("slow algo");
|
||||
return new Promise((resolve, reject) => {
|
||||
let webWorkerURL = URL.createObjectURL(new Blob([
|
||||
'(', processTask(), ')()'
|
||||
], { type: 'application/javascript' }));
|
||||
|
||||
let worker = new Worker(webWorkerURL);
|
||||
|
||||
worker.onmessage = (event) => {
|
||||
worker.terminate();
|
||||
resolve(event.data);
|
||||
};
|
||||
|
||||
worker.onerror = (event) => {
|
||||
worker.terminate();
|
||||
reject();
|
||||
};
|
||||
|
||||
worker.postMessage({
|
||||
data,
|
||||
difficulty
|
||||
});
|
||||
|
||||
URL.revokeObjectURL(webWorkerURL);
|
||||
});
|
||||
}
|
||||
|
||||
function processTask() {
|
||||
return function () {
|
||||
const sha256 = (text) => {
|
||||
const encoded = new TextEncoder().encode(text);
|
||||
return crypto.subtle.digest("SHA-256", encoded.buffer)
|
||||
.then((result) =>
|
||||
Array.from(new Uint8Array(result))
|
||||
.map((c) => c.toString(16).padStart(2, "0"))
|
||||
.join(""),
|
||||
);
|
||||
};
|
||||
|
||||
addEventListener('message', async (event) => {
|
||||
let data = event.data.data;
|
||||
let difficulty = event.data.difficulty;
|
||||
|
||||
let hash;
|
||||
let nonce = 0;
|
||||
do {
|
||||
hash = await sha256(data + nonce++);
|
||||
} while (hash.substring(0, difficulty) !== Array(difficulty + 1).join('0'));
|
||||
|
||||
nonce -= 1; // last nonce was post-incremented
|
||||
|
||||
postMessage({
|
||||
hash,
|
||||
data,
|
||||
difficulty,
|
||||
nonce,
|
||||
});
|
||||
});
|
||||
}.toString();
|
||||
}
|
||||
93
cmd/anubis/js/proof-of-work.mjs
Normal file
93
cmd/anubis/js/proof-of-work.mjs
Normal file
@@ -0,0 +1,93 @@
|
||||
export default function process(data, difficulty = 5, threads = (navigator.hardwareConcurrency || 1)) {
|
||||
console.debug("fast algo");
|
||||
return new Promise((resolve, reject) => {
|
||||
let webWorkerURL = URL.createObjectURL(new Blob([
|
||||
'(', processTask(), ')()'
|
||||
], { type: 'application/javascript' }));
|
||||
|
||||
const workers = [];
|
||||
|
||||
for (let i = 0; i < threads; i++) {
|
||||
let worker = new Worker(webWorkerURL);
|
||||
|
||||
worker.onmessage = (event) => {
|
||||
workers.forEach(worker => worker.terminate());
|
||||
worker.terminate();
|
||||
resolve(event.data);
|
||||
};
|
||||
|
||||
worker.onerror = (event) => {
|
||||
worker.terminate();
|
||||
reject();
|
||||
};
|
||||
|
||||
worker.postMessage({
|
||||
data,
|
||||
difficulty,
|
||||
nonce: i,
|
||||
threads,
|
||||
});
|
||||
|
||||
workers.push(worker);
|
||||
}
|
||||
|
||||
URL.revokeObjectURL(webWorkerURL);
|
||||
});
|
||||
}
|
||||
|
||||
function processTask() {
|
||||
return function () {
|
||||
const sha256 = (text) => {
|
||||
const encoded = new TextEncoder().encode(text);
|
||||
return crypto.subtle.digest("SHA-256", encoded.buffer);
|
||||
};
|
||||
|
||||
function uint8ArrayToHexString(arr) {
|
||||
return Array.from(arr)
|
||||
.map((c) => c.toString(16).padStart(2, "0"))
|
||||
.join("");
|
||||
}
|
||||
|
||||
addEventListener('message', async (event) => {
|
||||
let data = event.data.data;
|
||||
let difficulty = event.data.difficulty;
|
||||
let hash;
|
||||
let nonce = event.data.nonce;
|
||||
let threads = event.data.threads;
|
||||
|
||||
while (true) {
|
||||
const currentHash = await sha256(data + nonce);
|
||||
const thisHash = new Uint8Array(currentHash);
|
||||
let valid = true;
|
||||
|
||||
for (let j = 0; j < difficulty; j++) {
|
||||
const byteIndex = Math.floor(j / 2); // which byte we are looking at
|
||||
const nibbleIndex = j % 2; // which nibble in the byte we are looking at (0 is high, 1 is low)
|
||||
|
||||
let nibble = (thisHash[byteIndex] >> (nibbleIndex === 0 ? 4 : 0)) & 0x0F; // Get the nibble
|
||||
|
||||
if (nibble !== 0) {
|
||||
valid = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (valid) {
|
||||
hash = uint8ArrayToHexString(thisHash);
|
||||
console.log(hash);
|
||||
break;
|
||||
}
|
||||
|
||||
nonce += threads;
|
||||
}
|
||||
|
||||
postMessage({
|
||||
hash,
|
||||
data,
|
||||
difficulty,
|
||||
nonce,
|
||||
});
|
||||
});
|
||||
}.toString();
|
||||
}
|
||||
|
||||
16
cmd/anubis/js/video.mjs
Normal file
16
cmd/anubis/js/video.mjs
Normal file
@@ -0,0 +1,16 @@
|
||||
const videoElement = `<video id="videotest" width="0" height="0" src="/.within.website/x/cmd/anubis/static/testdata/black.mp4"></video>`;
|
||||
|
||||
export const testVideo = async (testarea) => {
|
||||
testarea.innerHTML = videoElement;
|
||||
return (await new Promise((resolve) => {
|
||||
const video = document.getElementById('videotest');
|
||||
video.oncanplay = () => {
|
||||
testarea.style.display = "none";
|
||||
resolve(true);
|
||||
};
|
||||
video.onerror = (ev) => {
|
||||
testarea.style.display = "none";
|
||||
resolve(false);
|
||||
};
|
||||
}));
|
||||
};
|
||||
File diff suppressed because it is too large
Load Diff
212
cmd/anubis/policy.go
Normal file
212
cmd/anubis/policy.go
Normal file
@@ -0,0 +1,212 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"regexp"
|
||||
|
||||
"github.com/TecharoHQ/anubis/cmd/anubis/internal/config"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
"github.com/yl2chen/cidranger"
|
||||
)
|
||||
|
||||
var (
|
||||
policyApplications = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "anubis_policy_results",
|
||||
Help: "The results of each policy rule",
|
||||
}, []string{"rule", "action"})
|
||||
)
|
||||
|
||||
type ParsedConfig struct {
|
||||
orig config.Config
|
||||
|
||||
Bots []Bot
|
||||
DNSBL bool
|
||||
}
|
||||
|
||||
type Bot struct {
|
||||
Name string
|
||||
UserAgent *regexp.Regexp
|
||||
Path *regexp.Regexp
|
||||
Action config.Rule `json:"action"`
|
||||
Challenge *config.ChallengeRules
|
||||
Ranger cidranger.Ranger
|
||||
}
|
||||
|
||||
func (b Bot) Hash() (string, error) {
|
||||
var pathRex string
|
||||
if b.Path != nil {
|
||||
pathRex = b.Path.String()
|
||||
}
|
||||
var userAgentRex string
|
||||
if b.UserAgent != nil {
|
||||
userAgentRex = b.UserAgent.String()
|
||||
}
|
||||
|
||||
return sha256sum(fmt.Sprintf("%s::%s::%s", b.Name, pathRex, userAgentRex)), nil
|
||||
}
|
||||
|
||||
func parseConfig(fin io.Reader, fname string, defaultDifficulty int) (*ParsedConfig, error) {
|
||||
var c config.Config
|
||||
if err := json.NewDecoder(fin).Decode(&c); err != nil {
|
||||
return nil, fmt.Errorf("can't parse policy config JSON %s: %w", fname, err)
|
||||
}
|
||||
|
||||
if err := c.Valid(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var err error
|
||||
|
||||
result := &ParsedConfig{
|
||||
orig: c,
|
||||
}
|
||||
|
||||
for _, b := range c.Bots {
|
||||
if berr := b.Valid(); berr != nil {
|
||||
err = errors.Join(err, berr)
|
||||
continue
|
||||
}
|
||||
|
||||
var botParseErr error
|
||||
parsedBot := Bot{
|
||||
Name: b.Name,
|
||||
Action: b.Action,
|
||||
}
|
||||
|
||||
if b.RemoteAddr != nil && len(b.RemoteAddr) > 0 {
|
||||
parsedBot.Ranger = cidranger.NewPCTrieRanger()
|
||||
|
||||
for _, cidr := range b.RemoteAddr {
|
||||
_, rng, err := net.ParseCIDR(cidr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("[unexpected] range %s not parsing: %w", cidr, err)
|
||||
}
|
||||
|
||||
parsedBot.Ranger.Insert(cidranger.NewBasicRangerEntry(*rng))
|
||||
}
|
||||
}
|
||||
|
||||
if b.UserAgentRegex != nil {
|
||||
userAgent, err := regexp.Compile(*b.UserAgentRegex)
|
||||
if err != nil {
|
||||
botParseErr = errors.Join(botParseErr, fmt.Errorf("while compiling user agent regexp: %w", err))
|
||||
continue
|
||||
} else {
|
||||
parsedBot.UserAgent = userAgent
|
||||
}
|
||||
}
|
||||
|
||||
if b.PathRegex != nil {
|
||||
path, err := regexp.Compile(*b.PathRegex)
|
||||
if err != nil {
|
||||
botParseErr = errors.Join(botParseErr, fmt.Errorf("while compiling path regexp: %w", err))
|
||||
continue
|
||||
} else {
|
||||
parsedBot.Path = path
|
||||
}
|
||||
}
|
||||
|
||||
if b.Challenge == nil {
|
||||
parsedBot.Challenge = &config.ChallengeRules{
|
||||
Difficulty: defaultDifficulty,
|
||||
ReportAs: defaultDifficulty,
|
||||
Algorithm: config.AlgorithmFast,
|
||||
}
|
||||
} else {
|
||||
parsedBot.Challenge = b.Challenge
|
||||
if parsedBot.Challenge.Algorithm == config.AlgorithmUnknown {
|
||||
parsedBot.Challenge.Algorithm = config.AlgorithmFast
|
||||
}
|
||||
}
|
||||
|
||||
result.Bots = append(result.Bots, parsedBot)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("errors validating policy config JSON %s: %w", fname, err)
|
||||
}
|
||||
|
||||
result.DNSBL = c.DNSBL
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
type CheckResult struct {
|
||||
Name string
|
||||
Rule config.Rule
|
||||
}
|
||||
|
||||
func (cr CheckResult) LogValue() slog.Value {
|
||||
return slog.GroupValue(
|
||||
slog.String("name", cr.Name),
|
||||
slog.String("rule", string(cr.Rule)))
|
||||
}
|
||||
|
||||
func cr(name string, rule config.Rule) CheckResult {
|
||||
return CheckResult{
|
||||
Name: name,
|
||||
Rule: rule,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) checkRemoteAddress(b Bot, addr net.IP) bool {
|
||||
if b.Ranger == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
ok, err := b.Ranger.Contains(addr)
|
||||
if err != nil {
|
||||
log.Panicf("[unexpected] something very funky is going on, %q does not have a calculable network number: %v", addr.String(), err)
|
||||
}
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
// Check evaluates the list of rules, and returns the result
|
||||
func (s *Server) check(r *http.Request) (CheckResult, *Bot, error) {
|
||||
host := r.Header.Get("X-Real-Ip")
|
||||
if host == "" {
|
||||
return zilch[CheckResult](), nil, fmt.Errorf("[misconfiguration] X-Real-Ip header is not set")
|
||||
}
|
||||
|
||||
addr := net.ParseIP(host)
|
||||
if addr == nil {
|
||||
return zilch[CheckResult](), nil, fmt.Errorf("[misconfiguration] %q is not an IP address", host)
|
||||
}
|
||||
|
||||
for _, b := range s.policy.Bots {
|
||||
if b.UserAgent != nil {
|
||||
if uaMatch := b.UserAgent.MatchString(r.UserAgent()); uaMatch || (uaMatch && s.checkRemoteAddress(b, addr)) {
|
||||
return cr("bot/"+b.Name, b.Action), &b, nil
|
||||
}
|
||||
}
|
||||
|
||||
if b.Path != nil {
|
||||
if pathMatch := b.Path.MatchString(r.URL.Path); pathMatch || (pathMatch && s.checkRemoteAddress(b, addr)) {
|
||||
return cr("bot/"+b.Name, b.Action), &b, nil
|
||||
}
|
||||
}
|
||||
|
||||
if b.Ranger != nil {
|
||||
if s.checkRemoteAddress(b, addr) {
|
||||
return cr("bot/"+b.Name, b.Action), &b, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return cr("default/allow", config.RuleAllow), &Bot{
|
||||
Challenge: &config.ChallengeRules{
|
||||
Difficulty: defaultDifficulty,
|
||||
ReportAs: defaultDifficulty,
|
||||
Algorithm: config.AlgorithmFast,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
65
cmd/anubis/policy_test.go
Normal file
65
cmd/anubis/policy_test.go
Normal file
@@ -0,0 +1,65 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDefaultPolicyMustParse(t *testing.T) {
|
||||
fin, err := static.Open("botPolicies.json")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fin.Close()
|
||||
|
||||
if _, err := parseConfig(fin, "botPolicies.json", defaultDifficulty); err != nil {
|
||||
t.Fatalf("can't parse config: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGoodConfigs(t *testing.T) {
|
||||
finfos, err := os.ReadDir("internal/config/testdata/good")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, st := range finfos {
|
||||
st := st
|
||||
t.Run(st.Name(), func(t *testing.T) {
|
||||
fin, err := os.Open(filepath.Join("internal", "config", "testdata", "good", st.Name()))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fin.Close()
|
||||
|
||||
if _, err := parseConfig(fin, fin.Name(), defaultDifficulty); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBadConfigs(t *testing.T) {
|
||||
finfos, err := os.ReadDir("internal/config/testdata/bad")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, st := range finfos {
|
||||
st := st
|
||||
t.Run(st.Name(), func(t *testing.T) {
|
||||
fin, err := os.Open(filepath.Join("internal", "config", "testdata", "bad", st.Name()))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fin.Close()
|
||||
|
||||
if _, err := parseConfig(fin, fin.Name(), defaultDifficulty); err == nil {
|
||||
t.Fatal(err)
|
||||
} else {
|
||||
t.Log(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
BIN
cmd/anubis/static/img/happy.webp
Normal file
BIN
cmd/anubis/static/img/happy.webp
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 58 KiB |
BIN
cmd/anubis/static/img/pensive.webp
Normal file
BIN
cmd/anubis/static/img/pensive.webp
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 48 KiB |
BIN
cmd/anubis/static/img/sad.webp
Normal file
BIN
cmd/anubis/static/img/sad.webp
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 50 KiB |
2
cmd/anubis/static/js/main.mjs
Normal file
2
cmd/anubis/static/js/main.mjs
Normal file
@@ -0,0 +1,2 @@
|
||||
(()=>{function p(r,n=5,t=navigator.hardwareConcurrency||1){return console.debug("fast algo"),new Promise((e,o)=>{let s=URL.createObjectURL(new Blob(["(",y(),")()"],{type:"application/javascript"})),a=[];for(let i=0;i<t;i++){let c=new Worker(s);c.onmessage=d=>{a.forEach(u=>u.terminate()),c.terminate(),e(d.data)},c.onerror=d=>{c.terminate(),o()},c.postMessage({data:r,difficulty:n,nonce:i,threads:t}),a.push(c)}URL.revokeObjectURL(s)})}function y(){return function(){let r=t=>{let e=new TextEncoder().encode(t);return crypto.subtle.digest("SHA-256",e.buffer)};function n(t){return Array.from(t).map(e=>e.toString(16).padStart(2,"0")).join("")}addEventListener("message",async t=>{let e=t.data.data,o=t.data.difficulty,s,a=t.data.nonce,i=t.data.threads;for(;;){let c=await r(e+a),d=new Uint8Array(c),u=!0;for(let m=0;m<o;m++){let l=Math.floor(m/2),g=m%2;if((d[l]>>(g===0?4:0)&15)!==0){u=!1;break}}if(u){s=n(d),console.log(s);break}a+=i}postMessage({hash:s,data:e,difficulty:o,nonce:a})})}.toString()}function f(r,n=5,t=1){return console.debug("slow algo"),new Promise((e,o)=>{let s=URL.createObjectURL(new Blob(["(",b(),")()"],{type:"application/javascript"})),a=new Worker(s);a.onmessage=i=>{a.terminate(),e(i.data)},a.onerror=i=>{a.terminate(),o()},a.postMessage({data:r,difficulty:n}),URL.revokeObjectURL(s)})}function b(){return function(){let r=n=>{let t=new TextEncoder().encode(n);return crypto.subtle.digest("SHA-256",t.buffer).then(e=>Array.from(new Uint8Array(e)).map(o=>o.toString(16).padStart(2,"0")).join(""))};addEventListener("message",async n=>{let t=n.data.data,e=n.data.difficulty,o,s=0;do o=await r(t+s++);while(o.substring(0,e)!==Array(e+1).join("0"));s-=1,postMessage({hash:o,data:t,difficulty:e,nonce:s})})}.toString()}var L={fast:p,slow:f},w=(r="",n={})=>{let t=new URL(r,window.location.href);return Object.entries(n).forEach(e=>{let[o,s]=e;t.searchParams.set(o,s)}),t.toString()},h=(r,n)=>w(`/.within.website/x/cmd/anubis/static/img/${r}.webp`,{cacheBuster:n});(async()=>{let r=document.getElementById("status"),n=document.getElementById("image"),t=document.getElementById("title"),e=document.getElementById("spinner"),o=JSON.parse(document.getElementById("anubis_version").textContent);r.innerHTML="Calculating...";let{challenge:s,rules:a}=await fetch("/.within.website/x/cmd/anubis/api/make-challenge",{method:"POST"}).then(l=>{if(!l.ok)throw new Error("Failed to fetch config");return l.json()}).catch(l=>{throw t.innerHTML="Oh no!",r.innerHTML=`Failed to fetch config: ${l.message}`,n.src=h("sad",o),e.innerHTML="",e.style.display="none",l}),i=L[a.algorithm];if(!i){t.innerHTML="Oh no!",r.innerHTML="Failed to resolve check algorithm. You may want to reload the page.",n.src=h("sad",o),e.innerHTML="",e.style.display="none";return}r.innerHTML=`Calculating...<br/>Difficulty: ${a.report_as}`;let c=Date.now(),{hash:d,nonce:u}=await i(s,a.difficulty),m=Date.now();console.log({hash:d,nonce:u}),t.innerHTML="Success!",r.innerHTML=`Done! Took ${m-c}ms, ${u} iterations`,n.src=h("happy",o),e.innerHTML="",e.style.display="none",setTimeout(()=>{let l=window.location.href;window.location.href=w("/.within.website/x/cmd/anubis/api/pass-challenge",{response:d,nonce:u,redir:l,elapsedTime:m-c})},250)})();})();
|
||||
//# sourceMappingURL=main.mjs.map
|
||||
BIN
cmd/anubis/static/js/main.mjs.br
Normal file
BIN
cmd/anubis/static/js/main.mjs.br
Normal file
Binary file not shown.
BIN
cmd/anubis/static/js/main.mjs.gz
Normal file
BIN
cmd/anubis/static/js/main.mjs.gz
Normal file
Binary file not shown.
7
cmd/anubis/static/js/main.mjs.map
Normal file
7
cmd/anubis/static/js/main.mjs.map
Normal file
File diff suppressed because one or more lines are too long
BIN
cmd/anubis/static/js/main.mjs.zst
Normal file
BIN
cmd/anubis/static/js/main.mjs.zst
Normal file
Binary file not shown.
47
cmd/anubis/static/robots.txt
Normal file
47
cmd/anubis/static/robots.txt
Normal file
@@ -0,0 +1,47 @@
|
||||
User-agent: AI2Bot
|
||||
User-agent: Ai2Bot-Dolma
|
||||
User-agent: Amazonbot
|
||||
User-agent: anthropic-ai
|
||||
User-agent: Applebot
|
||||
User-agent: Applebot-Extended
|
||||
User-agent: Bytespider
|
||||
User-agent: CCBot
|
||||
User-agent: ChatGPT-User
|
||||
User-agent: Claude-Web
|
||||
User-agent: ClaudeBot
|
||||
User-agent: cohere-ai
|
||||
User-agent: cohere-training-data-crawler
|
||||
User-agent: Diffbot
|
||||
User-agent: DuckAssistBot
|
||||
User-agent: FacebookBot
|
||||
User-agent: FriendlyCrawler
|
||||
User-agent: Google-Extended
|
||||
User-agent: GoogleOther
|
||||
User-agent: GoogleOther-Image
|
||||
User-agent: GoogleOther-Video
|
||||
User-agent: GPTBot
|
||||
User-agent: iaskspider/2.0
|
||||
User-agent: ICC-Crawler
|
||||
User-agent: ImagesiftBot
|
||||
User-agent: img2dataset
|
||||
User-agent: ISSCyberRiskCrawler
|
||||
User-agent: Kangaroo Bot
|
||||
User-agent: Meta-ExternalAgent
|
||||
User-agent: Meta-ExternalFetcher
|
||||
User-agent: OAI-SearchBot
|
||||
User-agent: omgili
|
||||
User-agent: omgilibot
|
||||
User-agent: PanguBot
|
||||
User-agent: PerplexityBot
|
||||
User-agent: PetalBot
|
||||
User-agent: Scrapy
|
||||
User-agent: SemrushBot
|
||||
User-agent: Sidetrade indexer bot
|
||||
User-agent: Timpibot
|
||||
User-agent: VelenPublicWebCrawler
|
||||
User-agent: Webzio-Extended
|
||||
User-agent: YouBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: *
|
||||
Disallow: /
|
||||
@@ -19,6 +19,7 @@ var (
|
||||
dockerLabels = flag.String("docker-labels", os.Getenv("DOCKER_METADATA_OUTPUT_LABELS"), "Docker image labels")
|
||||
dockerRepo = flag.String("docker-repo", "registry.int.xeserv.us/techaro/anubis", "Docker image repository for Anubis")
|
||||
dockerTags = flag.String("docker-tags", os.Getenv("DOCKER_METADATA_OUTPUT_TAGS"), "newline separated docker tags including the registry name")
|
||||
githubActor = flag.String("github-actor", "", "GitHub actor")
|
||||
githubEventName = flag.String("github-event-name", "", "GitHub event name")
|
||||
pullRequestID = flag.Int("pull-request-id", -1, "GitHub pull request ID")
|
||||
slogLevel = flag.String("slog-level", "INFO", "logging level (see https://pkg.go.dev/log/slog#hdr-Levels)")
|
||||
@@ -30,7 +31,7 @@ func main() {
|
||||
|
||||
internal.InitSlog(*slogLevel)
|
||||
|
||||
koDockerRepo := strings.TrimSuffix(*dockerRepo, "/"+filepath.Base(*dockerRepo))
|
||||
koDockerRepo := strings.TrimRight(*dockerRepo, "/"+filepath.Base(*dockerRepo))
|
||||
|
||||
if *githubEventName == "pull_request" && *pullRequestID != -1 {
|
||||
*dockerRepo = fmt.Sprintf("ttl.sh/techaro/pr-%d/anubis", *pullRequestID)
|
||||
@@ -46,11 +47,6 @@ func main() {
|
||||
)
|
||||
}
|
||||
|
||||
if strings.Contains(*dockerTags, ",") {
|
||||
newTags := strings.Join(strings.Split(*dockerTags, ","), "\n")
|
||||
dockerTags = &newTags
|
||||
}
|
||||
|
||||
setOutput("docker_image", strings.SplitN(*dockerTags, "\n", 2)[0])
|
||||
|
||||
version, err := run("git describe --tags --always --dirty")
|
||||
@@ -117,6 +113,11 @@ type image struct {
|
||||
tag string
|
||||
}
|
||||
|
||||
func newlineSep2Comma(inp string) string {
|
||||
lines := strings.Split(inp, "\n")
|
||||
return strings.Join(lines, ",")
|
||||
}
|
||||
|
||||
func parseImageList(imageList string) ([]image, error) {
|
||||
images := strings.Split(imageList, "\n")
|
||||
var result []image
|
||||
@@ -128,15 +129,15 @@ func parseImageList(imageList string) ([]image, error) {
|
||||
// reg.xeiaso.net/techaro/anubis:latest
|
||||
// repository: reg.xeiaso.net/techaro/anubis
|
||||
// tag: latest
|
||||
index := strings.LastIndex(img, ":")
|
||||
parts := strings.SplitN(img, ":", 2)
|
||||
result = append(result, image{
|
||||
repository: img[:index],
|
||||
tag: img[index+1:],
|
||||
repository: parts[0],
|
||||
tag: parts[1],
|
||||
})
|
||||
}
|
||||
|
||||
if len(result) == 0 {
|
||||
return nil, fmt.Errorf("no images provided, bad flags")
|
||||
return nil, fmt.Errorf("no images provided, bad flags??")
|
||||
}
|
||||
|
||||
return result, nil
|
||||
|
||||
@@ -1,78 +0,0 @@
|
||||
/*
|
||||
Batch process robots.txt files from archives like https://github.com/nrjones8/robots-dot-txt-archive-bot/tree/master/data/cleaned
|
||||
into Anubis CEL policies. Usage: go run batch_process.go <directory with robots.txt files>
|
||||
*/
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
fmt.Println("Usage: go run batch_process.go <cleaned_directory>")
|
||||
fmt.Println("Example: go run batch_process.go ./cleaned")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
cleanedDir := os.Args[1]
|
||||
outputDir := "generated_policies"
|
||||
|
||||
// Create output directory
|
||||
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
||||
log.Fatalf("Failed to create output directory: %v", err)
|
||||
}
|
||||
|
||||
count := 0
|
||||
err := filepath.WalkDir(cleanedDir, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Skip directories
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Generate policy name from file path
|
||||
relPath, _ := filepath.Rel(cleanedDir, path)
|
||||
policyName := strings.ReplaceAll(relPath, "/", "-")
|
||||
policyName = strings.TrimSuffix(policyName, "-robots.txt")
|
||||
policyName = strings.ReplaceAll(policyName, ".", "-")
|
||||
|
||||
outputFile := filepath.Join(outputDir, policyName+".yaml")
|
||||
|
||||
cmd := exec.Command("go", "run", "main.go",
|
||||
"-input", path,
|
||||
"-output", outputFile,
|
||||
"-name", policyName,
|
||||
"-format", "yaml")
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
fmt.Printf("Warning: Failed to process %s: %v\n", path, err)
|
||||
return nil // Continue processing other files
|
||||
}
|
||||
|
||||
count++
|
||||
if count%100 == 0 {
|
||||
fmt.Printf("Processed %d files...\n", count)
|
||||
} else if count%10 == 0 {
|
||||
fmt.Print(".")
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Error walking directory: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Successfully processed %d robots.txt files\n", count)
|
||||
fmt.Printf("Generated policies saved to: %s/\n", outputDir)
|
||||
}
|
||||
@@ -1,384 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
|
||||
"sigs.k8s.io/yaml"
|
||||
)
|
||||
|
||||
var (
|
||||
inputFile = flag.String("input", "", "path to robots.txt file (use - for stdin)")
|
||||
outputFile = flag.String("output", "", "output file path (use - for stdout, defaults to stdout)")
|
||||
outputFormat = flag.String("format", "yaml", "output format: yaml or json")
|
||||
baseAction = flag.String("action", "CHALLENGE", "default action for disallowed paths: ALLOW, DENY, CHALLENGE, WEIGH")
|
||||
crawlDelay = flag.Int("crawl-delay-weight", 0, "if > 0, add weight adjustment for crawl-delay (difficulty adjustment)")
|
||||
policyName = flag.String("name", "robots-txt-policy", "name for the generated policy")
|
||||
userAgentDeny = flag.String("deny-user-agents", "DENY", "action for specifically blocked user agents: DENY, CHALLENGE")
|
||||
helpFlag = flag.Bool("help", false, "show help")
|
||||
)
|
||||
|
||||
type RobotsRule struct {
|
||||
UserAgents []string
|
||||
Disallows []string
|
||||
Allows []string
|
||||
CrawlDelay int
|
||||
IsBlacklist bool // true if this is a specifically denied user agent
|
||||
}
|
||||
|
||||
type AnubisRule struct {
|
||||
Expression *config.ExpressionOrList `yaml:"expression,omitempty" json:"expression,omitempty"`
|
||||
Challenge *config.ChallengeRules `yaml:"challenge,omitempty" json:"challenge,omitempty"`
|
||||
Weight *config.Weight `yaml:"weight,omitempty" json:"weight,omitempty"`
|
||||
Name string `yaml:"name" json:"name"`
|
||||
Action string `yaml:"action" json:"action"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
flag.Usage = func() {
|
||||
fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, "%s [options] -input <robots.txt>\n\n", os.Args[0])
|
||||
flag.PrintDefaults()
|
||||
fmt.Fprintln(os.Stderr, "\nExamples:")
|
||||
fmt.Fprintln(os.Stderr, " # Convert local robots.txt file")
|
||||
fmt.Fprintln(os.Stderr, " robots2policy -input robots.txt -output policy.yaml")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
fmt.Fprintln(os.Stderr, " # Convert from URL")
|
||||
fmt.Fprintln(os.Stderr, " robots2policy -input https://example.com/robots.txt -format json")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
fmt.Fprintln(os.Stderr, " # Read from stdin, write to stdout")
|
||||
fmt.Fprintln(os.Stderr, " curl https://example.com/robots.txt | robots2policy -input -")
|
||||
os.Exit(2)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
if len(flag.Args()) > 0 || *helpFlag || *inputFile == "" {
|
||||
flag.Usage()
|
||||
}
|
||||
|
||||
// Read robots.txt
|
||||
var input io.Reader
|
||||
if *inputFile == "-" {
|
||||
input = os.Stdin
|
||||
} else if strings.HasPrefix(*inputFile, "http://") || strings.HasPrefix(*inputFile, "https://") {
|
||||
resp, err := http.Get(*inputFile)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to fetch robots.txt from URL: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
input = resp.Body
|
||||
} else {
|
||||
file, err := os.Open(*inputFile)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to open input file: %v", err)
|
||||
}
|
||||
defer file.Close()
|
||||
input = file
|
||||
}
|
||||
|
||||
// Parse robots.txt
|
||||
rules, err := parseRobotsTxt(input)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
// Convert to Anubis rules
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Check if any rules were generated
|
||||
if len(anubisRules) == 0 {
|
||||
log.Fatal("no valid rules generated from robots.txt - file may be empty or contain no disallow directives")
|
||||
}
|
||||
|
||||
// Generate output
|
||||
var output []byte
|
||||
switch strings.ToLower(*outputFormat) {
|
||||
case "yaml":
|
||||
output, err = yaml.Marshal(anubisRules)
|
||||
case "json":
|
||||
output, err = json.MarshalIndent(anubisRules, "", " ")
|
||||
default:
|
||||
log.Fatalf("unsupported output format: %s (use yaml or json)", *outputFormat)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("failed to marshal output: %v", err)
|
||||
}
|
||||
|
||||
// Write output
|
||||
if *outputFile == "" || *outputFile == "-" {
|
||||
fmt.Print(string(output))
|
||||
} else {
|
||||
err = os.WriteFile(*outputFile, output, 0644)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to write output file: %v", err)
|
||||
}
|
||||
fmt.Printf("Generated Anubis policy written to %s\n", *outputFile)
|
||||
}
|
||||
}
|
||||
|
||||
func createRuleFromAccumulated(userAgents, disallows, allows []string, crawlDelay int) RobotsRule {
|
||||
rule := RobotsRule{
|
||||
UserAgents: make([]string, len(userAgents)),
|
||||
Disallows: make([]string, len(disallows)),
|
||||
Allows: make([]string, len(allows)),
|
||||
CrawlDelay: crawlDelay,
|
||||
}
|
||||
copy(rule.UserAgents, userAgents)
|
||||
copy(rule.Disallows, disallows)
|
||||
copy(rule.Allows, allows)
|
||||
return rule
|
||||
}
|
||||
|
||||
func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
|
||||
scanner := bufio.NewScanner(input)
|
||||
var rules []RobotsRule
|
||||
var currentUserAgents []string
|
||||
var currentDisallows []string
|
||||
var currentAllows []string
|
||||
var currentCrawlDelay int
|
||||
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
|
||||
// Skip empty lines and comments
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Split on first colon
|
||||
parts := strings.SplitN(line, ":", 2)
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
directive := strings.TrimSpace(strings.ToLower(parts[0]))
|
||||
value := strings.TrimSpace(parts[1])
|
||||
|
||||
switch directive {
|
||||
case "user-agent":
|
||||
// If we have accumulated rules with directives and encounter a new user-agent,
|
||||
// flush the current rules
|
||||
if len(currentUserAgents) > 0 && (len(currentDisallows) > 0 || len(currentAllows) > 0 || currentCrawlDelay > 0) {
|
||||
rule := createRuleFromAccumulated(currentUserAgents, currentDisallows, currentAllows, currentCrawlDelay)
|
||||
rules = append(rules, rule)
|
||||
// Reset for next group
|
||||
currentUserAgents = nil
|
||||
currentDisallows = nil
|
||||
currentAllows = nil
|
||||
currentCrawlDelay = 0
|
||||
}
|
||||
currentUserAgents = append(currentUserAgents, value)
|
||||
|
||||
case "disallow":
|
||||
if len(currentUserAgents) > 0 && value != "" {
|
||||
currentDisallows = append(currentDisallows, value)
|
||||
}
|
||||
|
||||
case "allow":
|
||||
if len(currentUserAgents) > 0 && value != "" {
|
||||
currentAllows = append(currentAllows, value)
|
||||
}
|
||||
|
||||
case "crawl-delay":
|
||||
if len(currentUserAgents) > 0 {
|
||||
if delay, err := parseIntSafe(value); err == nil {
|
||||
currentCrawlDelay = delay
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Don't forget the last group of rules
|
||||
if len(currentUserAgents) > 0 {
|
||||
rule := createRuleFromAccumulated(currentUserAgents, currentDisallows, currentAllows, currentCrawlDelay)
|
||||
rules = append(rules, rule)
|
||||
}
|
||||
|
||||
// Mark blacklisted user agents (those with "Disallow: /")
|
||||
for i := range rules {
|
||||
for _, disallow := range rules[i].Disallows {
|
||||
if disallow == "/" {
|
||||
rules[i].IsBlacklist = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rules, scanner.Err()
|
||||
}
|
||||
|
||||
func parseIntSafe(s string) (int, error) {
|
||||
var result int
|
||||
_, err := fmt.Sscanf(s, "%d", &result)
|
||||
return result, err
|
||||
}
|
||||
|
||||
func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
||||
var anubisRules []AnubisRule
|
||||
ruleCounter := 0
|
||||
|
||||
// Process each robots rule individually
|
||||
for _, robotsRule := range robotsRules {
|
||||
userAgents := robotsRule.UserAgents
|
||||
|
||||
// Handle crawl delay
|
||||
if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 {
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
Name: fmt.Sprintf("%s-crawl-delay-%d", *policyName, ruleCounter),
|
||||
Action: "WEIGH",
|
||||
Weight: &config.Weight{Adjust: *crawlDelay},
|
||||
}
|
||||
|
||||
if len(userAgents) == 1 && userAgents[0] == "*" {
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{"true"}, // Always applies
|
||||
}
|
||||
} else if len(userAgents) == 1 {
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgents[0])},
|
||||
}
|
||||
} else {
|
||||
// Multiple user agents - use any block
|
||||
var expressions []string
|
||||
for _, ua := range userAgents {
|
||||
if ua == "*" {
|
||||
expressions = append(expressions, "true")
|
||||
} else {
|
||||
expressions = append(expressions, fmt.Sprintf("userAgent.contains(%q)", ua))
|
||||
}
|
||||
}
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
Any: expressions,
|
||||
}
|
||||
}
|
||||
anubisRules = append(anubisRules, rule)
|
||||
}
|
||||
|
||||
// Handle blacklisted user agents
|
||||
if robotsRule.IsBlacklist {
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
Name: fmt.Sprintf("%s-blacklist-%d", *policyName, ruleCounter),
|
||||
Action: *userAgentDeny,
|
||||
}
|
||||
|
||||
if len(userAgents) == 1 {
|
||||
userAgent := userAgents[0]
|
||||
if userAgent == "*" {
|
||||
// This would block everything - convert to a weight adjustment instead
|
||||
rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter)
|
||||
rule.Action = "WEIGH"
|
||||
rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{"true"}, // Always applies
|
||||
}
|
||||
} else {
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Multiple user agents - use any block
|
||||
var expressions []string
|
||||
for _, ua := range userAgents {
|
||||
if ua == "*" {
|
||||
expressions = append(expressions, "true")
|
||||
} else {
|
||||
expressions = append(expressions, fmt.Sprintf("userAgent.contains(%q)", ua))
|
||||
}
|
||||
}
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
Any: expressions,
|
||||
}
|
||||
}
|
||||
anubisRules = append(anubisRules, rule)
|
||||
}
|
||||
|
||||
// Handle specific disallow rules
|
||||
for _, disallow := range robotsRule.Disallows {
|
||||
if disallow == "/" {
|
||||
continue // Already handled as blacklist above
|
||||
}
|
||||
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
|
||||
Action: *baseAction,
|
||||
}
|
||||
|
||||
// Build CEL expression
|
||||
var conditions []string
|
||||
|
||||
// Add user agent conditions
|
||||
if len(userAgents) == 1 && userAgents[0] == "*" {
|
||||
// Wildcard user agent - no user agent condition needed
|
||||
} else if len(userAgents) == 1 {
|
||||
conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgents[0]))
|
||||
} else {
|
||||
// For multiple user agents, we need to use a more complex expression
|
||||
// This is a limitation - we can't easily combine any for user agents with all for path
|
||||
// So we'll create separate rules for each user agent
|
||||
for _, ua := range userAgents {
|
||||
if ua == "*" {
|
||||
continue // Skip wildcard as it's handled separately
|
||||
}
|
||||
ruleCounter++
|
||||
subRule := AnubisRule{
|
||||
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
|
||||
Action: *baseAction,
|
||||
Expression: &config.ExpressionOrList{
|
||||
All: []string{
|
||||
fmt.Sprintf("userAgent.contains(%q)", ua),
|
||||
buildPathCondition(disallow),
|
||||
},
|
||||
},
|
||||
}
|
||||
anubisRules = append(anubisRules, subRule)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Add path condition
|
||||
pathCondition := buildPathCondition(disallow)
|
||||
conditions = append(conditions, pathCondition)
|
||||
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: conditions,
|
||||
}
|
||||
|
||||
anubisRules = append(anubisRules, rule)
|
||||
}
|
||||
}
|
||||
|
||||
return anubisRules
|
||||
}
|
||||
|
||||
func buildPathCondition(robotsPath string) string {
|
||||
// Handle wildcards in robots.txt paths
|
||||
if strings.Contains(robotsPath, "*") || strings.Contains(robotsPath, "?") {
|
||||
// Convert robots.txt wildcards to regex
|
||||
regex := regexp.QuoteMeta(robotsPath)
|
||||
regex = strings.ReplaceAll(regex, `\*`, `.*`) // * becomes .*
|
||||
regex = strings.ReplaceAll(regex, `\?`, `.`) // ? becomes .
|
||||
regex = "^" + regex
|
||||
return fmt.Sprintf("path.matches(%q)", regex)
|
||||
}
|
||||
|
||||
// Simple prefix match for most cases
|
||||
return fmt.Sprintf("path.startsWith(%q)", robotsPath)
|
||||
}
|
||||
@@ -1,424 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
type TestCase struct {
|
||||
name string
|
||||
robotsFile string
|
||||
expectedFile string
|
||||
options TestOptions
|
||||
}
|
||||
|
||||
type TestOptions struct {
|
||||
format string
|
||||
action string
|
||||
crawlDelayWeight int
|
||||
policyName string
|
||||
deniedAction string
|
||||
}
|
||||
|
||||
func TestDataFileConversion(t *testing.T) {
|
||||
|
||||
testCases := []TestCase{
|
||||
{
|
||||
name: "simple_default",
|
||||
robotsFile: "simple.robots.txt",
|
||||
expectedFile: "simple.yaml",
|
||||
options: TestOptions{format: "yaml"},
|
||||
},
|
||||
{
|
||||
name: "simple_json",
|
||||
robotsFile: "simple.robots.txt",
|
||||
expectedFile: "simple.json",
|
||||
options: TestOptions{format: "json"},
|
||||
},
|
||||
{
|
||||
name: "simple_deny_action",
|
||||
robotsFile: "simple.robots.txt",
|
||||
expectedFile: "deny-action.yaml",
|
||||
options: TestOptions{format: "yaml", action: "DENY"},
|
||||
},
|
||||
{
|
||||
name: "simple_custom_name",
|
||||
robotsFile: "simple.robots.txt",
|
||||
expectedFile: "custom-name.yaml",
|
||||
options: TestOptions{format: "yaml", policyName: "my-custom-policy"},
|
||||
},
|
||||
{
|
||||
name: "blacklist_with_crawl_delay",
|
||||
robotsFile: "blacklist.robots.txt",
|
||||
expectedFile: "blacklist.yaml",
|
||||
options: TestOptions{format: "yaml", crawlDelayWeight: 3},
|
||||
},
|
||||
{
|
||||
name: "wildcards",
|
||||
robotsFile: "wildcards.robots.txt",
|
||||
expectedFile: "wildcards.yaml",
|
||||
options: TestOptions{format: "yaml"},
|
||||
},
|
||||
{
|
||||
name: "empty_file",
|
||||
robotsFile: "empty.robots.txt",
|
||||
expectedFile: "empty.yaml",
|
||||
options: TestOptions{format: "yaml"},
|
||||
},
|
||||
{
|
||||
name: "complex_scenario",
|
||||
robotsFile: "complex.robots.txt",
|
||||
expectedFile: "complex.yaml",
|
||||
options: TestOptions{format: "yaml", crawlDelayWeight: 5},
|
||||
},
|
||||
{
|
||||
name: "consecutive_user_agents",
|
||||
robotsFile: "consecutive.robots.txt",
|
||||
expectedFile: "consecutive.yaml",
|
||||
options: TestOptions{format: "yaml", crawlDelayWeight: 3},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
robotsPath := filepath.Join("testdata", tc.robotsFile)
|
||||
expectedPath := filepath.Join("testdata", tc.expectedFile)
|
||||
|
||||
// Read robots.txt input
|
||||
robotsFile, err := os.Open(robotsPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to open robots file %s: %v", robotsPath, err)
|
||||
}
|
||||
defer robotsFile.Close()
|
||||
|
||||
// Parse robots.txt
|
||||
rules, err := parseRobotsTxt(robotsFile)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
// Set test options
|
||||
oldFormat := *outputFormat
|
||||
oldAction := *baseAction
|
||||
oldCrawlDelay := *crawlDelay
|
||||
oldPolicyName := *policyName
|
||||
oldDeniedAction := *userAgentDeny
|
||||
|
||||
if tc.options.format != "" {
|
||||
*outputFormat = tc.options.format
|
||||
}
|
||||
if tc.options.action != "" {
|
||||
*baseAction = tc.options.action
|
||||
}
|
||||
if tc.options.crawlDelayWeight > 0 {
|
||||
*crawlDelay = tc.options.crawlDelayWeight
|
||||
}
|
||||
if tc.options.policyName != "" {
|
||||
*policyName = tc.options.policyName
|
||||
}
|
||||
if tc.options.deniedAction != "" {
|
||||
*userAgentDeny = tc.options.deniedAction
|
||||
}
|
||||
|
||||
// Restore options after test
|
||||
defer func() {
|
||||
*outputFormat = oldFormat
|
||||
*baseAction = oldAction
|
||||
*crawlDelay = oldCrawlDelay
|
||||
*policyName = oldPolicyName
|
||||
*userAgentDeny = oldDeniedAction
|
||||
}()
|
||||
|
||||
// Convert to Anubis rules
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Generate output
|
||||
var actualOutput []byte
|
||||
switch strings.ToLower(*outputFormat) {
|
||||
case "yaml":
|
||||
actualOutput, err = yaml.Marshal(anubisRules)
|
||||
case "json":
|
||||
actualOutput, err = json.MarshalIndent(anubisRules, "", " ")
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal output: %v", err)
|
||||
}
|
||||
|
||||
// Read expected output
|
||||
expectedOutput, err := os.ReadFile(expectedPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to read expected file %s: %v", expectedPath, err)
|
||||
}
|
||||
|
||||
if strings.ToLower(*outputFormat) == "yaml" {
|
||||
var actualData []interface{}
|
||||
var expectedData []interface{}
|
||||
|
||||
err = yaml.Unmarshal(actualOutput, &actualData)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal actual output: %v", err)
|
||||
}
|
||||
|
||||
err = yaml.Unmarshal(expectedOutput, &expectedData)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal expected output: %v", err)
|
||||
}
|
||||
|
||||
// Compare data structures
|
||||
if !compareData(actualData, expectedData) {
|
||||
actualStr := strings.TrimSpace(string(actualOutput))
|
||||
expectedStr := strings.TrimSpace(string(expectedOutput))
|
||||
t.Errorf("Output mismatch for %s\nExpected:\n%s\n\nActual:\n%s", tc.name, expectedStr, actualStr)
|
||||
}
|
||||
} else {
|
||||
var actualData []interface{}
|
||||
var expectedData []interface{}
|
||||
|
||||
err = json.Unmarshal(actualOutput, &actualData)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal actual JSON output: %v", err)
|
||||
}
|
||||
|
||||
err = json.Unmarshal(expectedOutput, &expectedData)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to unmarshal expected JSON output: %v", err)
|
||||
}
|
||||
|
||||
// Compare data structures
|
||||
if !compareData(actualData, expectedData) {
|
||||
actualStr := strings.TrimSpace(string(actualOutput))
|
||||
expectedStr := strings.TrimSpace(string(expectedOutput))
|
||||
t.Errorf("Output mismatch for %s\nExpected:\n%s\n\nActual:\n%s", tc.name, expectedStr, actualStr)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCaseInsensitiveParsing(t *testing.T) {
|
||||
robotsTxt := `User-Agent: *
|
||||
Disallow: /admin
|
||||
Crawl-Delay: 10
|
||||
|
||||
User-agent: TestBot
|
||||
disallow: /test
|
||||
crawl-delay: 5
|
||||
|
||||
USER-AGENT: UpperBot
|
||||
DISALLOW: /upper
|
||||
CRAWL-DELAY: 20`
|
||||
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse case-insensitive robots.txt: %v", err)
|
||||
}
|
||||
|
||||
expectedRules := 3
|
||||
if len(rules) != expectedRules {
|
||||
t.Errorf("Expected %d rules, got %d", expectedRules, len(rules))
|
||||
}
|
||||
|
||||
// Check that all crawl delays were parsed
|
||||
for i, rule := range rules {
|
||||
expectedDelays := []int{10, 5, 20}
|
||||
if rule.CrawlDelay != expectedDelays[i] {
|
||||
t.Errorf("Rule %d: expected crawl delay %d, got %d", i, expectedDelays[i], rule.CrawlDelay)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestVariousOutputFormats(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Disallow: /admin`
|
||||
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
oldPolicyName := *policyName
|
||||
*policyName = "test-policy"
|
||||
defer func() { *policyName = oldPolicyName }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Test YAML output
|
||||
yamlOutput, err := yaml.Marshal(anubisRules)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal YAML: %v", err)
|
||||
}
|
||||
|
||||
if !strings.Contains(string(yamlOutput), "name: test-policy-disallow-1") {
|
||||
t.Errorf("YAML output doesn't contain expected rule name")
|
||||
}
|
||||
|
||||
// Test JSON output
|
||||
jsonOutput, err := json.MarshalIndent(anubisRules, "", " ")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal JSON: %v", err)
|
||||
}
|
||||
|
||||
if !strings.Contains(string(jsonOutput), `"name": "test-policy-disallow-1"`) {
|
||||
t.Errorf("JSON output doesn't contain expected rule name")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDifferentActions(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Disallow: /admin`
|
||||
|
||||
testActions := []string{"ALLOW", "DENY", "CHALLENGE", "WEIGH"}
|
||||
|
||||
for _, action := range testActions {
|
||||
t.Run("action_"+action, func(t *testing.T) {
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
oldAction := *baseAction
|
||||
*baseAction = action
|
||||
defer func() { *baseAction = oldAction }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
if len(anubisRules) != 1 {
|
||||
t.Fatalf("Expected 1 rule, got %d", len(anubisRules))
|
||||
}
|
||||
|
||||
if anubisRules[0].Action != action {
|
||||
t.Errorf("Expected action %s, got %s", action, anubisRules[0].Action)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPolicyNaming(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Disallow: /admin
|
||||
Disallow: /private
|
||||
|
||||
User-agent: BadBot
|
||||
Disallow: /`
|
||||
|
||||
testNames := []string{"custom-policy", "my-rules", "site-protection"}
|
||||
|
||||
for _, name := range testNames {
|
||||
t.Run("name_"+name, func(t *testing.T) {
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
oldName := *policyName
|
||||
*policyName = name
|
||||
defer func() { *policyName = oldName }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Check that all rule names use the custom prefix
|
||||
for _, rule := range anubisRules {
|
||||
if !strings.HasPrefix(rule.Name, name+"-") {
|
||||
t.Errorf("Rule name %s doesn't start with expected prefix %s-", rule.Name, name)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCrawlDelayWeights(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Disallow: /admin
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: SlowBot
|
||||
Disallow: /slow
|
||||
Crawl-delay: 60`
|
||||
|
||||
testWeights := []int{1, 5, 10, 25}
|
||||
|
||||
for _, weight := range testWeights {
|
||||
t.Run(fmt.Sprintf("weight_%d", weight), func(t *testing.T) {
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
oldWeight := *crawlDelay
|
||||
*crawlDelay = weight
|
||||
defer func() { *crawlDelay = oldWeight }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Count weight rules and verify they have correct weight
|
||||
weightRules := 0
|
||||
for _, rule := range anubisRules {
|
||||
if rule.Action == "WEIGH" && rule.Weight != nil {
|
||||
weightRules++
|
||||
if rule.Weight.Adjust != weight {
|
||||
t.Errorf("Expected weight %d, got %d", weight, rule.Weight.Adjust)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
expectedWeightRules := 2 // One for *, one for SlowBot
|
||||
if weightRules != expectedWeightRules {
|
||||
t.Errorf("Expected %d weight rules, got %d", expectedWeightRules, weightRules)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlacklistActions(t *testing.T) {
|
||||
robotsTxt := `User-agent: BadBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: SpamBot
|
||||
Disallow: /`
|
||||
|
||||
testActions := []string{"DENY", "CHALLENGE"}
|
||||
|
||||
for _, action := range testActions {
|
||||
t.Run("blacklist_"+action, func(t *testing.T) {
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||
}
|
||||
|
||||
oldAction := *userAgentDeny
|
||||
*userAgentDeny = action
|
||||
defer func() { *userAgentDeny = oldAction }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// All rules should be blacklist rules with the specified action
|
||||
for _, rule := range anubisRules {
|
||||
if !strings.Contains(rule.Name, "blacklist") {
|
||||
t.Errorf("Expected blacklist rule, got %s", rule.Name)
|
||||
}
|
||||
if rule.Action != action {
|
||||
t.Errorf("Expected action %s, got %s", action, rule.Action)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// compareData performs a deep comparison of two data structures,
|
||||
// ignoring differences that are semantically equivalent in YAML/JSON
|
||||
func compareData(actual, expected interface{}) bool {
|
||||
return reflect.DeepEqual(actual, expected)
|
||||
}
|
||||
15
cmd/robots2policy/testdata/blacklist.robots.txt
vendored
15
cmd/robots2policy/testdata/blacklist.robots.txt
vendored
@@ -1,15 +0,0 @@
|
||||
# Test with blacklisted user agents
|
||||
User-agent: *
|
||||
Disallow: /admin
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: BadBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: SpamBot
|
||||
Disallow: /
|
||||
Crawl-delay: 60
|
||||
|
||||
User-agent: Googlebot
|
||||
Disallow: /search
|
||||
Crawl-delay: 5
|
||||
30
cmd/robots2policy/testdata/blacklist.yaml
vendored
30
cmd/robots2policy/testdata/blacklist.yaml
vendored
@@ -1,30 +0,0 @@
|
||||
- action: WEIGH
|
||||
expression: "true"
|
||||
name: robots-txt-policy-crawl-delay-1
|
||||
weight:
|
||||
adjust: 3
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/admin")
|
||||
name: robots-txt-policy-disallow-2
|
||||
- action: DENY
|
||||
expression: userAgent.contains("BadBot")
|
||||
name: robots-txt-policy-blacklist-3
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("SpamBot")
|
||||
name: robots-txt-policy-crawl-delay-4
|
||||
weight:
|
||||
adjust: 3
|
||||
- action: DENY
|
||||
expression: userAgent.contains("SpamBot")
|
||||
name: robots-txt-policy-blacklist-5
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("Googlebot")
|
||||
name: robots-txt-policy-crawl-delay-6
|
||||
weight:
|
||||
adjust: 3
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-7
|
||||
30
cmd/robots2policy/testdata/complex.robots.txt
vendored
30
cmd/robots2policy/testdata/complex.robots.txt
vendored
@@ -1,30 +0,0 @@
|
||||
# Complex real-world example
|
||||
User-agent: *
|
||||
Disallow: /admin/
|
||||
Disallow: /private/
|
||||
Disallow: /api/internal/
|
||||
Allow: /api/public/
|
||||
Crawl-delay: 5
|
||||
|
||||
User-agent: Googlebot
|
||||
Disallow: /search/
|
||||
Allow: /api/
|
||||
Crawl-delay: 2
|
||||
|
||||
User-agent: Bingbot
|
||||
Disallow: /search/
|
||||
Disallow: /admin/
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: BadBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: SeoBot
|
||||
Disallow: /
|
||||
Crawl-delay: 300
|
||||
|
||||
# Test with various patterns
|
||||
User-agent: TestBot
|
||||
Disallow: /*/admin
|
||||
Disallow: /temp*.html
|
||||
Disallow: /file?.log
|
||||
71
cmd/robots2policy/testdata/complex.yaml
vendored
71
cmd/robots2policy/testdata/complex.yaml
vendored
@@ -1,71 +0,0 @@
|
||||
- action: WEIGH
|
||||
expression: "true"
|
||||
name: robots-txt-policy-crawl-delay-1
|
||||
weight:
|
||||
adjust: 5
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/admin/")
|
||||
name: robots-txt-policy-disallow-2
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/private/")
|
||||
name: robots-txt-policy-disallow-3
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/api/internal/")
|
||||
name: robots-txt-policy-disallow-4
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("Googlebot")
|
||||
name: robots-txt-policy-crawl-delay-5
|
||||
weight:
|
||||
adjust: 5
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search/")
|
||||
name: robots-txt-policy-disallow-6
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("Bingbot")
|
||||
name: robots-txt-policy-crawl-delay-7
|
||||
weight:
|
||||
adjust: 5
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/search/")
|
||||
name: robots-txt-policy-disallow-8
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/admin/")
|
||||
name: robots-txt-policy-disallow-9
|
||||
- action: DENY
|
||||
expression: userAgent.contains("BadBot")
|
||||
name: robots-txt-policy-blacklist-10
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("SeoBot")
|
||||
name: robots-txt-policy-crawl-delay-11
|
||||
weight:
|
||||
adjust: 5
|
||||
- action: DENY
|
||||
expression: userAgent.contains("SeoBot")
|
||||
name: robots-txt-policy-blacklist-12
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/.*/admin")
|
||||
name: robots-txt-policy-disallow-13
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/temp.*\\.html")
|
||||
name: robots-txt-policy-disallow-14
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/file.\\.log")
|
||||
name: robots-txt-policy-disallow-15
|
||||
@@ -1,25 +0,0 @@
|
||||
# Test consecutive user agents that should be grouped into any: blocks
|
||||
User-agent: *
|
||||
Disallow: /admin
|
||||
Crawl-delay: 10
|
||||
|
||||
# Multiple consecutive user agents - should be grouped
|
||||
User-agent: BadBot
|
||||
User-agent: SpamBot
|
||||
User-agent: EvilBot
|
||||
Disallow: /
|
||||
|
||||
# Single user agent - should be separate
|
||||
User-agent: GoodBot
|
||||
Disallow: /private
|
||||
|
||||
# Multiple consecutive user agents with crawl delay
|
||||
User-agent: SlowBot1
|
||||
User-agent: SlowBot2
|
||||
Crawl-delay: 5
|
||||
|
||||
# Multiple consecutive user agents with specific path
|
||||
User-agent: SearchBot1
|
||||
User-agent: SearchBot2
|
||||
User-agent: SearchBot3
|
||||
Disallow: /search
|
||||
47
cmd/robots2policy/testdata/consecutive.yaml
vendored
47
cmd/robots2policy/testdata/consecutive.yaml
vendored
@@ -1,47 +0,0 @@
|
||||
- action: WEIGH
|
||||
expression: "true"
|
||||
name: robots-txt-policy-crawl-delay-1
|
||||
weight:
|
||||
adjust: 3
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/admin")
|
||||
name: robots-txt-policy-disallow-2
|
||||
- action: DENY
|
||||
expression:
|
||||
any:
|
||||
- userAgent.contains("BadBot")
|
||||
- userAgent.contains("SpamBot")
|
||||
- userAgent.contains("EvilBot")
|
||||
name: robots-txt-policy-blacklist-3
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("GoodBot")
|
||||
- path.startsWith("/private")
|
||||
name: robots-txt-policy-disallow-4
|
||||
- action: WEIGH
|
||||
expression:
|
||||
any:
|
||||
- userAgent.contains("SlowBot1")
|
||||
- userAgent.contains("SlowBot2")
|
||||
name: robots-txt-policy-crawl-delay-5
|
||||
weight:
|
||||
adjust: 3
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("SearchBot1")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-7
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("SearchBot2")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-8
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("SearchBot3")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-9
|
||||
6
cmd/robots2policy/testdata/custom-name.yaml
vendored
6
cmd/robots2policy/testdata/custom-name.yaml
vendored
@@ -1,6 +0,0 @@
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/admin/")
|
||||
name: my-custom-policy-disallow-1
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/private")
|
||||
name: my-custom-policy-disallow-2
|
||||
6
cmd/robots2policy/testdata/deny-action.yaml
vendored
6
cmd/robots2policy/testdata/deny-action.yaml
vendored
@@ -1,6 +0,0 @@
|
||||
- action: DENY
|
||||
expression: path.startsWith("/admin/")
|
||||
name: robots-txt-policy-disallow-1
|
||||
- action: DENY
|
||||
expression: path.startsWith("/private")
|
||||
name: robots-txt-policy-disallow-2
|
||||
2
cmd/robots2policy/testdata/empty.robots.txt
vendored
2
cmd/robots2policy/testdata/empty.robots.txt
vendored
@@ -1,2 +0,0 @@
|
||||
# Empty robots.txt (comments only)
|
||||
# No actual rules
|
||||
1
cmd/robots2policy/testdata/empty.yaml
vendored
1
cmd/robots2policy/testdata/empty.yaml
vendored
@@ -1 +0,0 @@
|
||||
[]
|
||||
12
cmd/robots2policy/testdata/simple.json
vendored
12
cmd/robots2policy/testdata/simple.json
vendored
@@ -1,12 +0,0 @@
|
||||
[
|
||||
{
|
||||
"expression": "path.startsWith(\"/admin/\")",
|
||||
"name": "robots-txt-policy-disallow-1",
|
||||
"action": "CHALLENGE"
|
||||
},
|
||||
{
|
||||
"expression": "path.startsWith(\"/private\")",
|
||||
"name": "robots-txt-policy-disallow-2",
|
||||
"action": "CHALLENGE"
|
||||
}
|
||||
]
|
||||
5
cmd/robots2policy/testdata/simple.robots.txt
vendored
5
cmd/robots2policy/testdata/simple.robots.txt
vendored
@@ -1,5 +0,0 @@
|
||||
# Simple robots.txt test
|
||||
User-agent: *
|
||||
Disallow: /admin/
|
||||
Disallow: /private
|
||||
Allow: /public
|
||||
6
cmd/robots2policy/testdata/simple.yaml
vendored
6
cmd/robots2policy/testdata/simple.yaml
vendored
@@ -1,6 +0,0 @@
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/admin/")
|
||||
name: robots-txt-policy-disallow-1
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/private")
|
||||
name: robots-txt-policy-disallow-2
|
||||
@@ -1,6 +0,0 @@
|
||||
# Test wildcard patterns
|
||||
User-agent: *
|
||||
Disallow: /search*
|
||||
Disallow: /*/private
|
||||
Disallow: /file?.txt
|
||||
Disallow: /admin/*?action=delete
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user