From f21706eb1284f70f76fa8182e8146355765375e1 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Tue, 21 Apr 2026 22:56:23 +0200 Subject: [PATCH] feat(data): add Meta's web indexer used for AI purposes (#1573) This indexer is documented in https://developers.facebook.com/docs/sharing/webmasters/web-crawlers. I saw it parsing the entirety of my Forgejo instance, so I suggest to widely block it. Signed-off-by: Benjamin Bouvier --- data/crawlers/ai-search.yaml | 2 +- web/static/robots.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/data/crawlers/ai-search.yaml b/data/crawlers/ai-search.yaml index bf1bdd2c..cf60dc0d 100644 --- a/data/crawlers/ai-search.yaml +++ b/data/crawlers/ai-search.yaml @@ -4,5 +4,5 @@ # - Claude-SearchBot: No published IP allowlist - name: "ai-crawlers-search" user_agent_regex: >- - OAI-SearchBot|Claude-SearchBot|PerplexityBot + OAI-SearchBot|Claude-SearchBot|PerplexityBot|meta-webindexer action: DENY diff --git a/web/static/robots.txt b/web/static/robots.txt index e5c518fc..c805b2a8 100644 --- a/web/static/robots.txt +++ b/web/static/robots.txt @@ -54,6 +54,7 @@ User-agent: meta-externalagent User-agent: Meta-ExternalAgent User-agent: meta-externalfetcher User-agent: Meta-ExternalFetcher +User-agent: meta-webindexer User-agent: MistralAI-User User-agent: MistralAI-User/1.0 User-agent: MyCentralAIScraperBot