diff --git a/data/crawlers/ai-search.yaml b/data/crawlers/ai-search.yaml index bf1bdd2c..cf60dc0d 100644 --- a/data/crawlers/ai-search.yaml +++ b/data/crawlers/ai-search.yaml @@ -4,5 +4,5 @@ # - Claude-SearchBot: No published IP allowlist - name: "ai-crawlers-search" user_agent_regex: >- - OAI-SearchBot|Claude-SearchBot|PerplexityBot + OAI-SearchBot|Claude-SearchBot|PerplexityBot|meta-webindexer action: DENY diff --git a/web/static/robots.txt b/web/static/robots.txt index e5c518fc..c805b2a8 100644 --- a/web/static/robots.txt +++ b/web/static/robots.txt @@ -54,6 +54,7 @@ User-agent: meta-externalagent User-agent: Meta-ExternalAgent User-agent: meta-externalfetcher User-agent: Meta-ExternalFetcher +User-agent: meta-webindexer User-agent: MistralAI-User User-agent: MistralAI-User/1.0 User-agent: MyCentralAIScraperBot