mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-10 10:38:45 +00:00
feat: add various robots.txt and YAML configurations for user agent handling and crawl delays
This commit is contained in:
@@ -1,356 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// Test URLs for real robots.txt files
|
||||
var testRobotsURLs = []struct {
|
||||
name string
|
||||
url string
|
||||
desc string
|
||||
}{
|
||||
{"Google", "https://www.google.com/robots.txt", "Google's comprehensive robots.txt with many disallows"},
|
||||
{"Apple", "https://www.apple.com/robots.txt", "Apple's robots.txt with product-specific rules"},
|
||||
{"GitHub", "https://github.com/robots.txt", "GitHub's developer-focused robots.txt"},
|
||||
{"Reddit", "https://www.reddit.com/robots.txt", "Reddit's social media robots.txt"},
|
||||
{"Wikipedia", "https://en.wikipedia.org/robots.txt", "Wikipedia's educational content robots.txt"},
|
||||
{"Twitter", "https://twitter.com/robots.txt", "Twitter's social platform robots.txt"},
|
||||
{"Facebook", "https://www.facebook.com/robots.txt", "Facebook's social network robots.txt"},
|
||||
{"LinkedIn", "https://www.linkedin.com/robots.txt", "LinkedIn's professional network robots.txt"},
|
||||
{"Amazon", "https://www.amazon.com/robots.txt", "Amazon's e-commerce robots.txt"},
|
||||
{"Microsoft", "https://www.microsoft.com/robots.txt", "Microsoft's corporate robots.txt"},
|
||||
}
|
||||
|
||||
func TestRealRobotsTxtConversion(t *testing.T) {
|
||||
if os.Getenv("DONT_USE_NETWORK") != "" {
|
||||
t.Skip("test requires network egress")
|
||||
}
|
||||
|
||||
for _, test := range testRobotsURLs {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
// Fetch robots.txt
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
resp, err := client.Get(test.url)
|
||||
if err != nil {
|
||||
t.Skipf("Failed to fetch %s: %v", test.url, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Skipf("Non-200 status for %s: %d", test.url, resp.StatusCode)
|
||||
}
|
||||
|
||||
// Parse robots.txt
|
||||
rules, err := parseRobotsTxt(resp.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse robots.txt from %s: %v", test.url, err)
|
||||
}
|
||||
|
||||
// Convert to Anubis rules
|
||||
oldPolicyName := *policyName
|
||||
*policyName = test.name + "-policy"
|
||||
defer func() { *policyName = oldPolicyName }()
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
|
||||
// Validate we got some rules
|
||||
if len(anubisRules) == 0 {
|
||||
t.Errorf("No rules generated for %s", test.name)
|
||||
return
|
||||
}
|
||||
|
||||
// Test YAML output
|
||||
yamlOutput, err := yaml.Marshal(anubisRules)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal YAML for %s: %v", test.name, err)
|
||||
}
|
||||
|
||||
if len(yamlOutput) == 0 {
|
||||
t.Errorf("Empty YAML output for %s", test.name)
|
||||
}
|
||||
|
||||
// Test JSON output
|
||||
jsonOutput, err := json.MarshalIndent(anubisRules, "", " ")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal JSON for %s: %v", test.name, err)
|
||||
}
|
||||
|
||||
if len(jsonOutput) == 0 {
|
||||
t.Errorf("Empty JSON output for %s", test.name)
|
||||
}
|
||||
|
||||
// Validate rule structure
|
||||
for i, rule := range anubisRules {
|
||||
if rule.Name == "" {
|
||||
t.Errorf("Rule %d has empty name for %s", i, test.name)
|
||||
}
|
||||
|
||||
if rule.Action == "" {
|
||||
t.Errorf("Rule %d has empty action for %s", i, test.name)
|
||||
}
|
||||
|
||||
validActions := map[string]bool{
|
||||
"ALLOW": true, "DENY": true, "CHALLENGE": true, "WEIGH": true,
|
||||
}
|
||||
if !validActions[rule.Action] {
|
||||
t.Errorf("Rule %d has invalid action '%s' for %s", i, rule.Action, test.name)
|
||||
}
|
||||
|
||||
// Check that CEL expressions exist
|
||||
if rule.Expression == nil {
|
||||
t.Errorf("Rule %d has no expression for %s", i, test.name)
|
||||
}
|
||||
}
|
||||
|
||||
t.Logf("Successfully converted %s: %d robots.txt rules -> %d Anubis rules",
|
||||
test.name, len(rules), len(anubisRules))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCELExpressionGeneration(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
robotsPath string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "simple path",
|
||||
robotsPath: "/admin",
|
||||
expected: `path.startsWith("/admin")`,
|
||||
},
|
||||
{
|
||||
name: "path with trailing slash",
|
||||
robotsPath: "/admin/",
|
||||
expected: `path.startsWith("/admin/")`,
|
||||
},
|
||||
{
|
||||
name: "wildcard path",
|
||||
robotsPath: "/search*",
|
||||
expected: `path.matches("^/search.*")`,
|
||||
},
|
||||
{
|
||||
name: "complex wildcard",
|
||||
robotsPath: "/*/wiki/*?action=*",
|
||||
expected: `path.matches("^/.*/wiki/.*.action=.*")`,
|
||||
},
|
||||
{
|
||||
name: "question mark wildcard",
|
||||
robotsPath: "/file?.txt",
|
||||
expected: `path.matches("^/file.\\.txt")`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
result := buildPathCondition(test.robotsPath)
|
||||
if result != test.expected {
|
||||
t.Errorf("Expected %q, got %q", test.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRobotsTxtParsing(t *testing.T) {
|
||||
robotsTxt := `# Example robots.txt
|
||||
User-agent: *
|
||||
Disallow: /admin/
|
||||
Disallow: /private
|
||||
Allow: /public
|
||||
|
||||
User-agent: Googlebot
|
||||
Disallow: /search
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: BadBot
|
||||
Disallow: /
|
||||
|
||||
# Rogue bots
|
||||
User-agent: SpamBot
|
||||
Disallow: /`
|
||||
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse test robots.txt: %v", err)
|
||||
}
|
||||
|
||||
expectedRules := 4 // *, Googlebot, BadBot, SpamBot
|
||||
if len(rules) != expectedRules {
|
||||
t.Errorf("Expected %d rules, got %d", expectedRules, len(rules))
|
||||
}
|
||||
|
||||
// Check universal rules
|
||||
universalRule := rules[0]
|
||||
if universalRule.UserAgent != "*" {
|
||||
t.Errorf("Expected universal user agent '*', got %q", universalRule.UserAgent)
|
||||
}
|
||||
|
||||
if len(universalRule.Disallows) != 2 {
|
||||
t.Errorf("Expected 2 disallows for universal rule, got %d", len(universalRule.Disallows))
|
||||
}
|
||||
|
||||
if len(universalRule.Allows) != 1 {
|
||||
t.Errorf("Expected 1 allow for universal rule, got %d", len(universalRule.Allows))
|
||||
}
|
||||
|
||||
// Check Googlebot rules
|
||||
googlebotRule := rules[1]
|
||||
if googlebotRule.UserAgent != "Googlebot" {
|
||||
t.Errorf("Expected Googlebot user agent, got %q", googlebotRule.UserAgent)
|
||||
}
|
||||
|
||||
if googlebotRule.CrawlDelay != 10 {
|
||||
t.Errorf("Expected crawl delay 10, got %d", googlebotRule.CrawlDelay)
|
||||
}
|
||||
|
||||
// Check blacklisted bots
|
||||
badBotRule := rules[2]
|
||||
if !badBotRule.IsBlacklist {
|
||||
t.Errorf("BadBot should be marked as blacklisted")
|
||||
}
|
||||
|
||||
spamBotRule := rules[3]
|
||||
if !spamBotRule.IsBlacklist {
|
||||
t.Errorf("SpamBot should be marked as blacklisted")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAnubisRuleGeneration(t *testing.T) {
|
||||
// Test with simple robots.txt
|
||||
robotsRules := []RobotsRule{
|
||||
{
|
||||
UserAgent: "*",
|
||||
Disallows: []string{"/admin", "/private"},
|
||||
Allows: []string{"/public"},
|
||||
},
|
||||
{
|
||||
UserAgent: "BadBot",
|
||||
Disallows: []string{"/"},
|
||||
IsBlacklist: true,
|
||||
},
|
||||
}
|
||||
|
||||
oldPolicyName := *policyName
|
||||
*policyName = "test-policy"
|
||||
defer func() { *policyName = oldPolicyName }()
|
||||
|
||||
oldBaseAction := *baseAction
|
||||
*baseAction = "CHALLENGE"
|
||||
defer func() { *baseAction = oldBaseAction }()
|
||||
|
||||
oldUserAgentDeny := *userAgentDeny
|
||||
*userAgentDeny = "DENY"
|
||||
defer func() { *userAgentDeny = oldUserAgentDeny }()
|
||||
|
||||
anubisRules := convertToAnubisRules(robotsRules)
|
||||
|
||||
// Should have 3 rules: 2 disallows + 1 blacklist
|
||||
expectedRules := 3
|
||||
if len(anubisRules) != expectedRules {
|
||||
t.Errorf("Expected %d Anubis rules, got %d", expectedRules, len(anubisRules))
|
||||
}
|
||||
|
||||
// Check first disallow rule
|
||||
firstRule := anubisRules[0]
|
||||
if firstRule.Action != "CHALLENGE" {
|
||||
t.Errorf("Expected CHALLENGE action, got %s", firstRule.Action)
|
||||
}
|
||||
|
||||
if !strings.Contains(firstRule.Name, "disallow") {
|
||||
t.Errorf("Expected disallow in rule name, got %s", firstRule.Name)
|
||||
}
|
||||
|
||||
// Check blacklist rule
|
||||
var blacklistRule *AnubisRule
|
||||
for _, rule := range anubisRules {
|
||||
if strings.Contains(rule.Name, "blacklist") {
|
||||
blacklistRule = &rule
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if blacklistRule == nil {
|
||||
t.Errorf("No blacklist rule found")
|
||||
} else {
|
||||
if blacklistRule.Action != "DENY" {
|
||||
t.Errorf("Expected DENY action for blacklist, got %s", blacklistRule.Action)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEmptyRobotsTxt(t *testing.T) {
|
||||
reader := strings.NewReader("")
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse empty robots.txt: %v", err)
|
||||
}
|
||||
|
||||
if len(rules) != 0 {
|
||||
t.Errorf("Expected 0 rules for empty robots.txt, got %d", len(rules))
|
||||
}
|
||||
|
||||
anubisRules := convertToAnubisRules(rules)
|
||||
if len(anubisRules) != 0 {
|
||||
t.Errorf("Expected 0 Anubis rules for empty robots.txt, got %d", len(anubisRules))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCommentsOnlyRobotsTxt(t *testing.T) {
|
||||
robotsTxt := `# This is a comment
|
||||
# Another comment
|
||||
# User-agent: * (commented out)
|
||||
# Disallow: / (commented out)`
|
||||
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse comments-only robots.txt: %v", err)
|
||||
}
|
||||
|
||||
if len(rules) != 0 {
|
||||
t.Errorf("Expected 0 rules for comments-only robots.txt, got %d", len(rules))
|
||||
}
|
||||
}
|
||||
|
||||
func TestMalformedRobotsTxt(t *testing.T) {
|
||||
robotsTxt := `User-agent: *
|
||||
Disallow /admin (missing colon)
|
||||
Allow: /public
|
||||
Random line without colon
|
||||
User-agent
|
||||
Disallow: /test`
|
||||
|
||||
reader := strings.NewReader(robotsTxt)
|
||||
rules, err := parseRobotsTxt(reader)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse malformed robots.txt: %v", err)
|
||||
}
|
||||
|
||||
// Should still parse the valid parts
|
||||
if len(rules) == 0 {
|
||||
t.Errorf("Expected some rules despite malformed input, got 0")
|
||||
}
|
||||
|
||||
// Should have at least the Allow rule
|
||||
foundAllow := false
|
||||
for _, rule := range rules {
|
||||
if len(rule.Allows) > 0 {
|
||||
foundAllow = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !foundAllow {
|
||||
t.Errorf("Expected to find Allow rule in malformed robots.txt")
|
||||
}
|
||||
}
|
||||
15
cmd/robots2policy/testdata/blacklist.robots.txt
vendored
Normal file
15
cmd/robots2policy/testdata/blacklist.robots.txt
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# Test with blacklisted user agents
|
||||
User-agent: *
|
||||
Disallow: /admin
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: BadBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: SpamBot
|
||||
Disallow: /
|
||||
Crawl-delay: 60
|
||||
|
||||
User-agent: Googlebot
|
||||
Disallow: /search
|
||||
Crawl-delay: 5
|
||||
36
cmd/robots2policy/testdata/blacklist.yaml
vendored
Normal file
36
cmd/robots2policy/testdata/blacklist.yaml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
- name: robots-txt-policy-crawl-delay-1
|
||||
action: WEIGH
|
||||
expression:
|
||||
single: "true"
|
||||
weight:
|
||||
adjust: 3
|
||||
- name: robots-txt-policy-disallow-2
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.startsWith("/admin")
|
||||
- name: robots-txt-policy-blacklist-3
|
||||
action: DENY
|
||||
expression:
|
||||
single: userAgent.contains("BadBot")
|
||||
- name: robots-txt-policy-crawl-delay-4
|
||||
action: WEIGH
|
||||
expression:
|
||||
single: userAgent.contains("SpamBot")
|
||||
weight:
|
||||
adjust: 3
|
||||
- name: robots-txt-policy-blacklist-5
|
||||
action: DENY
|
||||
expression:
|
||||
single: userAgent.contains("SpamBot")
|
||||
- name: robots-txt-policy-crawl-delay-6
|
||||
action: WEIGH
|
||||
expression:
|
||||
single: userAgent.contains("Googlebot")
|
||||
weight:
|
||||
adjust: 3
|
||||
- name: robots-txt-policy-disallow-7
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search")
|
||||
30
cmd/robots2policy/testdata/complex.robots.txt
vendored
Normal file
30
cmd/robots2policy/testdata/complex.robots.txt
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
# Complex real-world example
|
||||
User-agent: *
|
||||
Disallow: /admin/
|
||||
Disallow: /private/
|
||||
Disallow: /api/internal/
|
||||
Allow: /api/public/
|
||||
Crawl-delay: 5
|
||||
|
||||
User-agent: Googlebot
|
||||
Disallow: /search/
|
||||
Allow: /api/
|
||||
Crawl-delay: 2
|
||||
|
||||
User-agent: Bingbot
|
||||
Disallow: /search/
|
||||
Disallow: /admin/
|
||||
Crawl-delay: 10
|
||||
|
||||
User-agent: BadBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: SeoBot
|
||||
Disallow: /
|
||||
Crawl-delay: 300
|
||||
|
||||
# Test with various patterns
|
||||
User-agent: TestBot
|
||||
Disallow: /*/admin
|
||||
Disallow: /temp*.html
|
||||
Disallow: /file?.log
|
||||
80
cmd/robots2policy/testdata/complex.yaml
vendored
Normal file
80
cmd/robots2policy/testdata/complex.yaml
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
- name: robots-txt-policy-crawl-delay-1
|
||||
action: WEIGH
|
||||
expression:
|
||||
single: "true"
|
||||
weight:
|
||||
adjust: 3
|
||||
- name: robots-txt-policy-disallow-2
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.startsWith("/admin/")
|
||||
- name: robots-txt-policy-disallow-3
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.startsWith("/private/")
|
||||
- name: robots-txt-policy-disallow-4
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.startsWith("/api/internal/")
|
||||
- name: robots-txt-policy-crawl-delay-5
|
||||
action: WEIGH
|
||||
expression:
|
||||
single: userAgent.contains("Googlebot")
|
||||
weight:
|
||||
adjust: 3
|
||||
- name: robots-txt-policy-disallow-6
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search/")
|
||||
- name: robots-txt-policy-crawl-delay-7
|
||||
action: WEIGH
|
||||
expression:
|
||||
single: userAgent.contains("Bingbot")
|
||||
weight:
|
||||
adjust: 3
|
||||
- name: robots-txt-policy-disallow-8
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/search/")
|
||||
- name: robots-txt-policy-disallow-9
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/admin/")
|
||||
- name: robots-txt-policy-blacklist-10
|
||||
action: DENY
|
||||
expression:
|
||||
single: userAgent.contains("BadBot")
|
||||
- name: robots-txt-policy-crawl-delay-11
|
||||
action: WEIGH
|
||||
expression:
|
||||
single: userAgent.contains("SeoBot")
|
||||
weight:
|
||||
adjust: 3
|
||||
- name: robots-txt-policy-blacklist-12
|
||||
action: DENY
|
||||
expression:
|
||||
single: userAgent.contains("SeoBot")
|
||||
- name: robots-txt-policy-disallow-13
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/.*/admin")
|
||||
- name: robots-txt-policy-disallow-14
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/temp.*\\.html")
|
||||
- name: robots-txt-policy-disallow-15
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/file.\\.log")
|
||||
8
cmd/robots2policy/testdata/custom-name.yaml
vendored
Normal file
8
cmd/robots2policy/testdata/custom-name.yaml
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
- name: my-custom-policy-disallow-1
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.startsWith("/admin/")
|
||||
- name: my-custom-policy-disallow-2
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.startsWith("/private")
|
||||
8
cmd/robots2policy/testdata/deny-action.yaml
vendored
Normal file
8
cmd/robots2policy/testdata/deny-action.yaml
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
- name: robots-txt-policy-disallow-1
|
||||
action: DENY
|
||||
expression:
|
||||
single: path.startsWith("/admin/")
|
||||
- name: robots-txt-policy-disallow-2
|
||||
action: DENY
|
||||
expression:
|
||||
single: path.startsWith("/private")
|
||||
2
cmd/robots2policy/testdata/empty.robots.txt
vendored
Normal file
2
cmd/robots2policy/testdata/empty.robots.txt
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
# Empty robots.txt (comments only)
|
||||
# No actual rules
|
||||
1
cmd/robots2policy/testdata/empty.yaml
vendored
Normal file
1
cmd/robots2policy/testdata/empty.yaml
vendored
Normal file
@@ -0,0 +1 @@
|
||||
[]
|
||||
16
cmd/robots2policy/testdata/simple.json
vendored
Normal file
16
cmd/robots2policy/testdata/simple.json
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
[
|
||||
{
|
||||
"name": "robots-txt-policy-disallow-1",
|
||||
"action": "CHALLENGE",
|
||||
"expression": {
|
||||
"single": "path.startsWith(\"/admin/\")"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "robots-txt-policy-disallow-2",
|
||||
"action": "CHALLENGE",
|
||||
"expression": {
|
||||
"single": "path.startsWith(\"/private\")"
|
||||
}
|
||||
}
|
||||
]
|
||||
5
cmd/robots2policy/testdata/simple.robots.txt
vendored
Normal file
5
cmd/robots2policy/testdata/simple.robots.txt
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
# Simple robots.txt test
|
||||
User-agent: *
|
||||
Disallow: /admin/
|
||||
Disallow: /private
|
||||
Allow: /public
|
||||
8
cmd/robots2policy/testdata/simple.yaml
vendored
Normal file
8
cmd/robots2policy/testdata/simple.yaml
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
- name: robots-txt-policy-disallow-1
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.startsWith("/admin/")
|
||||
- name: robots-txt-policy-disallow-2
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.startsWith("/private")
|
||||
6
cmd/robots2policy/testdata/wildcards.robots.txt
vendored
Normal file
6
cmd/robots2policy/testdata/wildcards.robots.txt
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
# Test wildcard patterns
|
||||
User-agent: *
|
||||
Disallow: /search*
|
||||
Disallow: /*/private
|
||||
Disallow: /file?.txt
|
||||
Disallow: /admin/*?action=delete
|
||||
16
cmd/robots2policy/testdata/wildcards.yaml
vendored
Normal file
16
cmd/robots2policy/testdata/wildcards.yaml
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
- name: robots-txt-policy-disallow-1
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.matches("^/search.*")
|
||||
- name: robots-txt-policy-disallow-2
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.matches("^/.*/private")
|
||||
- name: robots-txt-policy-disallow-3
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.matches("^/file.\\.txt")
|
||||
- name: robots-txt-policy-disallow-4
|
||||
action: CHALLENGE
|
||||
expression:
|
||||
single: path.matches("^/admin/.*.action=delete")
|
||||
Reference in New Issue
Block a user