#
# robots.txt for http://www.wikipedia.org/ and friends
#
# Please note: There are a lot of pages on this site, and there are
# some misbehaved spiders out there that go _way_ too fast. If you're
# irresponsible, your access to the site may be blocked.
#
# Please honor crawl-delay
sitemap: /sitemap.xml
# User-agent: bingbot
# User-agent: DuckAssistBot
# User-agent: Google-Extended
# User-agent: Google-Firebase
# User-agent: Google-NotebookLM
# User-agent: GoogleAgent-Mariner
# User-agent: GoogleOther
# User-agent: GoogleOther-Image
# User-agent: GoogleOther-Video
# Please be good.. and just too many badbots to allow the good ones to work
User-agent: ChatGPT Agent
User-agent: ChatGPT-User
User-agent: Claude-SearchBot
User-agent: Claude-User
User-agent: Claude-Web
User-agent: ClaudeBot
User-agent: Gemini-Deep-Research
User-agent: Google-CloudVertexBot
User-agent: Google-Extended
User-agent: GPTBot
User-agent: meta-externalagent
User-agent: Meta-ExternalAgent
User-agent: meta-externalfetcher
User-agent: Meta-ExternalFetcher
User-agent: meta-webindexer
User-agent: FacebookBot
User-agent: facebookexternalhit
User-agent: TikTokSpider
User-agent: YandexAdditional
User-agent: YandexAdditionalBot
Disallow: /
User-agent: AddSearchBot
User-agent: AdsBot-Google
User-agent: AI2Bot
User-agent: Ai2Bot-Dolma
User-agent: aiHitBot
User-agent: AmazonBuyForMe
User-agent: atlassian-bot
User-agent: amazon-kendra-
User-agent: Amazonbot
User-agent: AMZN-User
User-agent: Andibot
User-agent: Anomura
User-agent: anthropic-ai
User-agent: Applebot
User-agent: Applebot-Extended
User-agent: Awario
User-agent: bedrockbot
User-agent: bigsur.ai
User-agent: Bravebot
User-agent: Brightbot 1.0
User-agent: BuddyBot
User-agent: Bytespider
User-agent: CCBot
User-agent: Cloudflare-AutoRAG
User-agent: CloudVertexBot
User-agent: cohere-ai
User-agent: cohere-training-data-crawler
User-agent: Cotoyogi
User-agent: Crawlspace
User-agent: Datenbank Crawler
User-agent: DeepSeekBot
User-agent: Devin
User-agent: Diffbot
User-agent: Echobot Bot
User-agent: EchoboxBot
User-agent: Factset_spyderbot
User-agent: FirecrawlAgent
User-agent: FriendlyCrawler
User-agent: iaskspider/2.0
User-agent: IbouBot
User-agent: ICC-Crawler
User-agent: ImagesiftBot
User-agent: img2dataset
User-agent: ISSCyberRiskCrawler
User-agent: Kangaroo Bot
User-agent: KlaviyoAIBot
User-agent: LinerBot
User-agent: Linguee Bot
User-agent: MistralAI-User
User-agent: MistralAI-User/1.0
User-agent: MyCentralAIScraperBot
User-agent: netEstate Imprint Crawler
User-agent: NotebookLM
User-agent: NovaAct
User-agent: OAI-SearchBot
User-agent: omgili
User-agent: omgilibot
User-agent: OpenAI
User-agent: Operator
User-agent: PanguBot
User-agent: Panscient
User-agent: panscient.com
User-agent: Perplexity-User
User-agent: PerplexityBot
User-agent: PetalBot
User-agent: PhindBot
User-agent: Poseidon Research Crawler
User-agent: QualifiedBot
User-agent: QuillBot
User-agent: quillbot.com
User-agent: RyteBot
User-agent: SBIntuitionsBot
User-agent: Scrapy
User-agent: SemrushBot
User-agent: SemrushBot-BA
User-agent: SemrushBot-ESI
User-agent: SemrushBot-FT
User-agent: SemrushBot-OCOB
User-agent: SemrushBot-SI
User-agent: SemrushBot-SWA
User-agent: SiteAuditBot
User-agent: SplitSignalBot
User-agent: ShapBot
User-agent: Sidetrade indexer bot
User-agent: TerraCotta
User-agent: Thinkbot
User-agent: Timpibot
User-agent: VelenPublicWebCrawler
User-agent: WARDBot
User-agent: Webzio-Extended
User-agent: wpbot
User-agent: YaK
User-agent: YouBot
Disallow: /
# Observed spamming large amounts of https://en.wikipedia.org/?curid=NNNNNN
# and ignoring 429 ratelimit responses, claims to respect robots:
# http://mj12bot.com/
User-agent: MJ12bot
Disallow: /
# Crawlers that are kind enough to obey, but which we'd rather not have
# unless they're feeding search engines.
User-agent: UbiCrawler
Disallow: /
User-agent: DOC
Disallow: /
User-agent: Zao
Disallow: /
# Some bots are known to be trouble, particularly those designed to copy
# entire sites. Please obey robots.txt.
User-agent: sitecheck.internetseer.com
Disallow: /
User-agent: Zealbot
Disallow: /
User-agent: MSIECrawler
Disallow: /
User-agent: SiteSnagger
Disallow: /
User-agent: WebStripper
Disallow: /
User-agent: WebCopier
Disallow: /
User-agent: Fetch
Disallow: /
User-agent: Offline Explorer
Disallow: /
User-agent: Teleport
Disallow: /
User-agent: TeleportPro
Disallow: /
User-agent: WebZIP
Disallow: /
User-agent: linko
Disallow: /
User-agent: HTTrack
Disallow: /
User-agent: Microsoft.URL.Control
Disallow: /
User-agent: Xenu
Disallow: /
User-agent: larbin
Disallow: /
User-agent: libwww
Disallow: /
User-agent: ZyBORG
Disallow: /
User-agent: Download Ninja
Disallow: /
# Misbehaving: requests much too fast:
User-agent: fast
Disallow: /
#
# Sorry, wget in its recursive mode is a frequent problem.
# Please read the man page and use it properly; there is a
# --wait option you can use to set the delay between hits,
# for instance.
#
User-agent: wget
Disallow: /
#
# The 'grub' distributed client has been *very* poorly behaved.
#
User-agent: grub-client
Disallow: /
#
# Doesn't follow robots.txt anyway, but...
#
User-agent: k2spider
Disallow: /
#
# Hits many times per second, not acceptable
# http://www.nameprotect.com/botinfo.html
User-agent: NPBot
Disallow: /
# A capture bot, downloads gazillions of pages with no public benefit
# http://www.webreaper.net/
User-agent: WebReaper
Disallow: /
# Allow the Internet Archiver to index action=raw and thereby store the raw wikitext of pages
User-agent: ia_archiver
Allow: /*&action=raw
# Violates crawl-delay
# https://darkvisitors.com/agents/aliyunsecbot
User-agent: AliyunSecBot
Disallow: /
#
# Friendly, low-speed bots are welcome viewing article pages, but not
# dynamically-generated pages please.
#
# Inktomi's "Slurp" can read a minimum delay between hits; if your
# bot supports such a thing using the 'Crawl-delay' or another
# instruction, please let us know.
#
# http://www.google.com/bot.html
User-agent: Googlebot-*
User-agent: Googlebot
User-agent: *
Allow: /w/load.php?
Disallow: /index.php
Disallow: /api/
Disallow: /trap/
Disallow: /w/
Disallow: /wiki?fulltext
Disallow: /wiki/MediaWiki%3A
Disallow: /wiki/MediaWiki:
Disallow: /wiki/Module%3A
Disallow: /wiki/Module:
Disallow: /wiki/Module_talk%3A
Disallow: /wiki/Module_talk:
Disallow: /wiki/Special%3A
Disallow: /wiki/Special:
Disallow: /wiki/Talk%3A
Disallow: /wiki/Talk:
Disallow: /wiki/Template%3A
Disallow: /wiki/Template:
Disallow: /wiki/Template_talk%3A
Disallow: /wiki/Template_talk:
Disallow: /wiki/User%3A
Disallow: /wiki/User:
Disallow: /wiki/User_talk%3A
Disallow: /wiki/User_talk:
Disallow: /w/index.php?diff
Disallow: /*diff*
Disallow: /*search*
Crawl-delay: 10