mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-11 11:08:48 +00:00
* feat: add robots2policy CLI utility to convert robots.txt to Anubis challenge policies
* feat: add documentation for robots2policy CLI tool
* feat: implement crawl delay handling as weight adjustment in Anubis rules
* feat: add various robots.txt and YAML configurations for user agent handling and crawl delays
* test: add comprehensive tests for robots2policy conversion and parsing
* fix: update example URL in usage instructions for robots2policy CLI
* Update metadata
check-spelling run (pull_request) for json/robots2policycli
Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev>
* docs: add crawl delay weight adjustment and deny user agents option to robots2policy CLI
* Update cmd/robots2policy/main.go
Co-authored-by: Xe Iaso <me@xeiaso.net>
Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com>
* Update cmd/robots2policy/main.go
Co-authored-by: Xe Iaso <me@xeiaso.net>
Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com>
* fix(robots2policy): use sigs.k8s.io/yaml
Signed-off-by: Xe Iaso <me@xeiaso.net>
* feat(config): properly marshal bot policy rules
Signed-off-by: Xe Iaso <me@xeiaso.net>
* chore(yeetfile): expose robots2policy in libexec
Signed-off-by: Xe Iaso <me@xeiaso.net>
* fix(yeetfile): put robots2policy in $PATH
Signed-off-by: Xe Iaso <me@xeiaso.net>
* Update metadata
check-spelling run (pull_request) for json/robots2policycli
Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev>
* style: reorder imports
* refactor: use preexisting structs in config
* fix: correct flag check in main function
* fix: reorder fields in AnubisRule struct for better alignment
* style: improve alignment of struct fields in AnubisRule and OGTagCache
* Update metadata
check-spelling run (pull_request) for json/robots2policycli
Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev>
* fix: add validation for generated Anubis rules from robots.txt
* feat: add batch processing for robots.txt files to generate Anubis CEL policies
* fix: improve usage message and error handling for input file requirement
* refactor: update AnubisRule structure to use ExpressionOrList for improved expression handling
* refactor: reorganize policy definitions in YAML files for consistency and clarity
* fix: correct indentation in blacklist and complex YAML files for consistency
* test: enhance output comparison in robots2policy tests for YAML and JSON formats
* Revert "fix: improve usage message and error handling for input file requirement"
This reverts commit ddcde1f2a3.
* fix: improve usage message and error handling in robots2policy
Signed-off-by: Jason Cameron <git@jasoncameron.dev>
---------
Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com>
Signed-off-by: Xe Iaso <me@xeiaso.net>
Signed-off-by: Jason Cameron <git@jasoncameron.dev>
Co-authored-by: Xe Iaso <me@xeiaso.net>
216 lines
4.1 KiB
Go
216 lines
4.1 KiB
Go
package config
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"errors"
|
|
"testing"
|
|
|
|
yaml "sigs.k8s.io/yaml/goyaml.v3"
|
|
)
|
|
|
|
func TestExpressionOrListMarshalJSON(t *testing.T) {
|
|
for _, tt := range []struct {
|
|
name string
|
|
input *ExpressionOrList
|
|
output []byte
|
|
err error
|
|
}{
|
|
{
|
|
name: "single expression",
|
|
input: &ExpressionOrList{
|
|
Expression: "true",
|
|
},
|
|
output: []byte(`"true"`),
|
|
err: nil,
|
|
},
|
|
{
|
|
name: "all",
|
|
input: &ExpressionOrList{
|
|
All: []string{"true", "true"},
|
|
},
|
|
output: []byte(`{"all":["true","true"]}`),
|
|
err: nil,
|
|
},
|
|
{
|
|
name: "all one",
|
|
input: &ExpressionOrList{
|
|
All: []string{"true"},
|
|
},
|
|
output: []byte(`"true"`),
|
|
err: nil,
|
|
},
|
|
{
|
|
name: "any",
|
|
input: &ExpressionOrList{
|
|
Any: []string{"true", "false"},
|
|
},
|
|
output: []byte(`{"any":["true","false"]}`),
|
|
err: nil,
|
|
},
|
|
{
|
|
name: "any one",
|
|
input: &ExpressionOrList{
|
|
Any: []string{"true"},
|
|
},
|
|
output: []byte(`"true"`),
|
|
err: nil,
|
|
},
|
|
} {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
result, err := json.Marshal(tt.input)
|
|
if !errors.Is(err, tt.err) {
|
|
t.Errorf("wanted marshal error: %v but got: %v", tt.err, err)
|
|
}
|
|
|
|
if !bytes.Equal(result, tt.output) {
|
|
t.Logf("wanted: %s", string(tt.output))
|
|
t.Logf("got: %s", string(result))
|
|
t.Error("mismatched output")
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestExpressionOrListMarshalYAML(t *testing.T) {
|
|
for _, tt := range []struct {
|
|
name string
|
|
input *ExpressionOrList
|
|
output []byte
|
|
err error
|
|
}{
|
|
{
|
|
name: "single expression",
|
|
input: &ExpressionOrList{
|
|
Expression: "true",
|
|
},
|
|
output: []byte(`"true"`),
|
|
err: nil,
|
|
},
|
|
{
|
|
name: "all",
|
|
input: &ExpressionOrList{
|
|
All: []string{"true", "true"},
|
|
},
|
|
output: []byte(`all:
|
|
- "true"
|
|
- "true"`),
|
|
err: nil,
|
|
},
|
|
{
|
|
name: "all one",
|
|
input: &ExpressionOrList{
|
|
All: []string{"true"},
|
|
},
|
|
output: []byte(`"true"`),
|
|
err: nil,
|
|
},
|
|
{
|
|
name: "any",
|
|
input: &ExpressionOrList{
|
|
Any: []string{"true", "false"},
|
|
},
|
|
output: []byte(`any:
|
|
- "true"
|
|
- "false"`),
|
|
err: nil,
|
|
},
|
|
{
|
|
name: "any one",
|
|
input: &ExpressionOrList{
|
|
Any: []string{"true"},
|
|
},
|
|
output: []byte(`"true"`),
|
|
err: nil,
|
|
},
|
|
} {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
result, err := yaml.Marshal(tt.input)
|
|
if !errors.Is(err, tt.err) {
|
|
t.Errorf("wanted marshal error: %v but got: %v", tt.err, err)
|
|
}
|
|
|
|
result = bytes.TrimSpace(result)
|
|
|
|
if !bytes.Equal(result, tt.output) {
|
|
t.Logf("wanted: %q", string(tt.output))
|
|
t.Logf("got: %q", string(result))
|
|
t.Error("mismatched output")
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestExpressionOrListUnmarshalJSON(t *testing.T) {
|
|
for _, tt := range []struct {
|
|
err error
|
|
validErr error
|
|
result *ExpressionOrList
|
|
name string
|
|
inp string
|
|
}{
|
|
{
|
|
name: "simple",
|
|
inp: `"\"User-Agent\" in headers"`,
|
|
result: &ExpressionOrList{
|
|
Expression: `"User-Agent" in headers`,
|
|
},
|
|
},
|
|
{
|
|
name: "object-and",
|
|
inp: `{
|
|
"all": ["\"User-Agent\" in headers"]
|
|
}`,
|
|
result: &ExpressionOrList{
|
|
All: []string{
|
|
`"User-Agent" in headers`,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
name: "object-or",
|
|
inp: `{
|
|
"any": ["\"User-Agent\" in headers"]
|
|
}`,
|
|
result: &ExpressionOrList{
|
|
Any: []string{
|
|
`"User-Agent" in headers`,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
name: "both-or-and",
|
|
inp: `{
|
|
"all": ["\"User-Agent\" in headers"],
|
|
"any": ["\"User-Agent\" in headers"]
|
|
}`,
|
|
validErr: ErrExpressionCantHaveBoth,
|
|
},
|
|
{
|
|
name: "expression-empty",
|
|
inp: `{
|
|
"any": []
|
|
}`,
|
|
validErr: ErrExpressionEmpty,
|
|
},
|
|
} {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
var eol ExpressionOrList
|
|
|
|
if err := json.Unmarshal([]byte(tt.inp), &eol); !errors.Is(err, tt.err) {
|
|
t.Errorf("wanted unmarshal error: %v but got: %v", tt.err, err)
|
|
}
|
|
|
|
if tt.result != nil && !eol.Equal(tt.result) {
|
|
t.Logf("wanted: %#v", tt.result)
|
|
t.Logf("got: %#v", &eol)
|
|
t.Fatal("parsed expression is not what was expected")
|
|
}
|
|
|
|
if err := eol.Valid(); !errors.Is(err, tt.validErr) {
|
|
t.Errorf("wanted validation error: %v but got: %v", tt.err, err)
|
|
}
|
|
})
|
|
}
|
|
}
|