From 24857f430f5236edd770c7ae6377eeda21a21606 Mon Sep 17 00:00:00 2001 From: Marielle Volz Date: Fri, 20 Mar 2026 11:13:26 +0000 Subject: [PATCH] feat(data): add Citoid to good bots list (#1524) * Add Wikimedia Foundation citoid services file Wikimedia Foundation runs a service called citoid which retrieves citation metadata from urls in order to create formatted citations. This file contains the ip ranges allocated to the WMF (https://wikitech.wikimedia.org/wiki/IP_and_AS_allocations) from which the services make requests, as well as regex for the User-Agents from both services used to generate citations (citoid, and Zotero's translation-server which citoid makes requests to as well in order to generate the metadata). Signed-off-by: Marielle Volz * Add Wikimedia Citoid crawler to allowed list Signed-off-by: Marielle Volz * chore: update spelling Signed-off-by: Xe Iaso --------- Signed-off-by: Marielle Volz Signed-off-by: Xe Iaso Co-authored-by: Xe Iaso --- .github/actions/spelling/allow.txt | 2 ++ data/crawlers/_allow-good.yaml | 1 + data/crawlers/wikimedia-citoid.yaml | 18 ++++++++++++++++++ 3 files changed, 21 insertions(+) create mode 100644 data/crawlers/wikimedia-citoid.yaml diff --git a/.github/actions/spelling/allow.txt b/.github/actions/spelling/allow.txt index 8d4b5c3d..9718e5b3 100644 --- a/.github/actions/spelling/allow.txt +++ b/.github/actions/spelling/allow.txt @@ -31,3 +31,5 @@ Stargate FFXIV uvensys de +envoyproxy +unipromos diff --git a/data/crawlers/_allow-good.yaml b/data/crawlers/_allow-good.yaml index 44a6fed6..98f6fdca 100644 --- a/data/crawlers/_allow-good.yaml +++ b/data/crawlers/_allow-good.yaml @@ -8,4 +8,5 @@ - import: (data)/crawlers/marginalia.yaml - import: (data)/crawlers/mojeekbot.yaml - import: (data)/crawlers/commoncrawl.yaml +- import: (data)/crawlers/wikimedia-citoid.yaml - import: (data)/crawlers/yandexbot.yaml diff --git a/data/crawlers/wikimedia-citoid.yaml b/data/crawlers/wikimedia-citoid.yaml new file mode 100644 index 00000000..e5d4ede7 --- /dev/null +++ b/data/crawlers/wikimedia-citoid.yaml @@ -0,0 +1,18 @@ +# Wikimedia Foundation citation services +# https://www.mediawiki.org/wiki/Citoid + +- name: wikimedia-citoid + user_agent_regex: "Citoid/WMF" + action: ALLOW + remote_addresses: [ + "208.80.152.0/22", + "2620:0:860::/46", + ] + +- name: wikimedia-zotero-translation-server + user_agent_regex: "ZoteroTranslationServer/WMF" + action: ALLOW + remote_addresses: [ + "208.80.152.0/22", + "2620:0:860::/46", + ] \ No newline at end of file