# robots.txt for wikiHow
# based on wikipedia.org's robots.txt
#
# Crawlers that are kind enough to obey, but which we'd rather not have
# unless they're feeding search engines.
# Sitemap: https://www.wikihow.com/sitemap_index.xml
# Or: /sitemap.xml on some other domains.
#
# If your bot supports such a thing using the 'Crawl-delay' or another
# instruction, we can add it to our robots.txt.
#
# Friendly, low-speed bots are welcome viewing article pages, but not
# dynamically-generated pages please. Article pages contain our site's
# real content.
#
# NOTICE: All crawlers and bots, regardless of whether or not they are
# specified below, are strictly prohibited from using our content for the
# purposes of training or retrieval for AI models or similar machine learning
# systems, except where explicit prior permission has been granted by wikiHow
# through a contractual licensing agreement. Any use of our content for such
# purposes without our explicit contractual permission is a violation of our
# terms of service and intellectual property rights.
User-agent: Ai2Bot
Disallow: /
User-agent: AI2Bot
Disallow: /
User-agent: Ai2Bot-Dolma
Disallow: /
User-agent: Amazonbot
Disallow: /
User-agent: anthropic-ai
Disallow: /
User-agent: archive.org
Disallow: /api.php
Disallow: /index.php
Disallow: /Special:
User-agent: Applebot-Extended
Disallow: /
User-agent: Bytespider
Disallow: /
User-agent: CCBot
Disallow: /
User-agent: ChatGPT-User
Disallow: /
User-agent: ClaudeBot
Disallow: /
User-agent: Claude-SearchBot
Disallow: /
User-agent: Claude-User
Disallow: /
User-agent: Claude-Web
Disallow: /
User-agent: cohere-ai
Disallow: /
User-agent: Diffbot
Disallow: /
User-agent: DOC
Disallow: /
User-agent: Download Ninja
Disallow: /
User-agent: DuckAssistBot
Disallow: /
User-agent: FacebookBot
Disallow: /
User-agent: Facebookexternalhit
Disallow: /
User-agent: FriendlyCrawler
Disallow: /
User-agent: Fetch
Disallow: /
User-agent: GPTBot
Disallow: /
User-agent: GrokApp
Disallow: /
User-agent: HMSE_Robot
Disallow: /
User-agent: HTTrack
Disallow: /
User-agent: ia_archiver
Disallow: /
User-agent: ICC-Crawler
Disallow: /
User-agent: ImagesiftBot
Disallow: /
User-agent: img2dataset
Disallow: /
User-agent: k2spider
Disallow: /
User-agent: larbin
Disallow: /
User-agent: libwww
Disallow: /
User-agent: linko
Disallow: /
User-agent: Meta-ExternalAgent
Disallow: /
User-agent: Meta-ExternalFetcher
Disallow: /
User-agent: Microsoft.URL.Control
Disallow: /
User-agent: MistralAI-User
Disallow: /
User-agent: MSIECrawler
Disallow: /
# Requests many pages per second
# http://www.nameprotect.com/botinfo.html
User-agent: NPBot
Disallow: /
User-agent: OAI-SearchBot
Disallow: /
User-agent: Offline Explorer
Disallow: /
User-agent: omgili
Disallow: /
User-agent: OmigiliBot
Disallow: /
User-agent: PerplexityBot
Disallow: /
User-agent: PetalBot
Disallow: /
User-agent: Scrapy
Disallow: /
User-agent: Seekr
Disallow: /
# Some bots are known to be trouble, particularly those designed to copy
# entire sites. Please obey robots.txt.
User-agent: sitecheck.internetseer.com
Disallow: /
User-agent: SiteSnagger
Disallow: /
User-agent: Teleport
Disallow: /
User-agent: TeleportPro
Disallow: /
User-agent: TikTokSpider
Disallow: /
User-agent: Timpibot
Disallow: /
User-agent: UbiCrawler
Disallow: /
User-agent: VelenPublicWebCrawler
Disallow: /
User-agent: WebCopier
Disallow: /
User-agent: WebReaper
Disallow: /
User-agent: WebStripper
Disallow: /
User-agent: Webzio-Extended
Disallow: /
User-agent: WebZIP
Disallow: /
# wget in recursive mode uses too many resources for us.
# Please read the man page and use it properly; there is a
# --wait option you can use to set the delay between hits,
# for instance. Please wait 3 seconds between each request.
User-agent: wget
Disallow: /
User-agent: Xenu
Disallow: /
User-agent: YouBot
Disallow: /
User-agent: Zao
Disallow: /
User-agent: Zealbot
Disallow: /
User-agent: ZyBORG
Disallow: /
User-agent: AdsBot-Google
Allow: /
User-agent: Mediapartners-Google
Allow: /
User-agent: Googlebot
Allow: /Special:NewPages
Allow: /Special:Sitemap
Allow: /Special:CategoryListing
Allow: /
Disallow: /wikiHowTo
User-agent: *
Allow: /Special:Block
Allow: /Special:BlockList
Allow: /Special:Categorylisting
Allow: /Special:CategoryListing
Allow: /Special:Charity
Allow: /Special:EmailUser
Allow: /Special:LSearch
Allow: /Special:NewPages
Allow: /Special:QABox
Allow: /Special:SearchAd
Allow: /Special:Sitemap
Allow: /Special:ThankAuthors
Allow: /Special:UserLogin
Allow: /index.php?*action=credits
Allow: /index.php?*MathShowImage
Allow: /index.php?*printable
Disallow: /index.php
Disallow: /*feed=rss
Disallow: /*action=delete
Disallow: /*action=history
Disallow: /Special:
Disallow: /*platform=
Disallow: /*variant=
Disallow: /wikiHowTo