User-agent: TurnitinBot
Disallow: /
User-agent: *
Disallow: /logs
Disallow: /stats/
Disallow: /cgi_bin/
Disallow: /private/
Disallow: /temp/
User-agent: MSNbot
Disallow: /
User-agent: *
Disallow: /gallery/view_photo_properties.php
# domo arigato mr. roboto
# since Google keeps hitting 404 urls and won't stop..
#User-Agent: *
#Disallow: /cgi-bin
# much of the following taken from http://pigdog.org/robots.txt.
# http://www.plagiarism.org/crawler/robotinfo.html Fuck off, snitchbot!
User-Agent: SlySearch
Disallow: /
# various lame bots
User-Agent: arianna.iol.it Linux/2.2.17-14smp (linux)
Disallow: /
User-Agent: Webclipping.com
Disallow: /
# People who just set up dork-ass library bots they downloaded
# off the Innurnet, and don't even bother to ID themselves,
# are ASS. Go fuck yourself.
User-Agent: larbin_2.6.2 [email protected]
Disallow: /
User-Agent: larbin_2.6.2
Disallow: /
User-Agent: larbin_2.6.1 [email protected]
Disallow: /
User-Agent: larbin_2.6.1
Disallow: /
User-Agent: libwww-perl/5.5.3
Disallow: /
User-Agent: libwww-perl/5.53
Disallow: /
User-Agent: libwww-perl
Disallow: /
User-Agent: Python-urllib/1.10
Disallow: /
User-Agent: Python-urllib
Disallow: /
User-Agent: Java1.3.0
Disallow: /
User-Agent: Java1.4.0
Disallow: /
User-Agent: Java
Disallow: /
User-Agent: WinampMPEG/2.00 [email protected]
Disallow: /
User-Agent: WinampMPEG
Disallow: /
User-Agent: MSIE-5.13 [email protected]
Disallow: /
User-Agent: Opera/6.01 [email protected]
Disallow: /
User-Agent: Opera/6.01 [email protected]
Disallow: /
User-Agent: Mozilla/5.0 [email protected]
Disallow: /
# Welcome to the Innernet. You might want to read the HTTP spec before
# fucking with our Web site. Your user-agent ID is bad, which suggests
# that you don't know shit, and you're a sloppy programmer. Go away.
# http://www.ietf.org/rfc/rfc1945.txt
# http://www.ietf.org/rfc/rfc2068.txt
# this is not email, d00d.
User-Agent: [email protected]
Disallow: /
User-Agent: Zeus 64087 Webster Pro V2.9 Win32
Disallow: /
# whitespace, d00d.
User-Agent: NetResearchServer/2.3(loopimprovements.com/robot.html)
Disallow: /
User-Agent: NetResearchServer
Disallow: /
# ID first, d00d. Then a space, then a comment.
User-Agent: Openfind data gatherer, Openbot/3.0+([email protected];+http://www.openfind.com.tw/robot.html)
Disallow: /
User-Agent: Openbot
Disallow: /
# I don't care if you _ARE_ IBM. Jeebus. Stupid scientists.
User-Agent: http://www.almaden.ibm.com/cs/crawler
Disallow: /
# Gosh, so close.
User-Agent: NetMechanic V3.0
Disallow: /
User-Agent: NetMechanic
Disallow: /
# No spaces. First the ID, then a space, then a comment.
User-Agent: Ariadne RPT-HTTPClient/0.3-3
Disallow: /
User-Agent: Ariadne
Disallow: /
# You suck.
User-Agent: Mozilla/4.0 compatible ZyBorg/1.0 ([email protected]; http://www.WISEnutbot.com)
Disallow: /
User-Agent: ZyBorg
Disallow: /
# slash, not a space
User-Agent: appie 1.1 (www.walhello.com)
Disallow: /
User-Agent: appie
Disallow: /
# bad version string
User-Agent: search.ch V1.4.2 ([email protected]; http://www.search.ch)
Disallow: /
User-Agent: search.ch
Disallow: /
# No spaces.
User-Agent: PingALink Monitoring Services 1.0 (http://www.pingalink.com)
Disallow: /
User-Agent: PingALink Monitoring Services
Disallow: /
User-Agent: PingALink
Disallow: /
# fuck amigas, anyways
User-Agent: AWeb-II v3.4SE/AMIGAOS-3.9
Disallow: /
User-Agent: AWeb-II
Disallow: /
# Other misc blocks
User-Agent: Enterprise_Search/1.0 (http://www.innerprise.net/es-spider.asp)
Disallow: /
User-Agent: BlitzBot
Disallow: /
# spoofing the href to sites that have annoying javascript and other stuff
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .WONKZ)
Disallow: /
# http://www.clockwatchers.com/robots_list.html spammer bots
User-Agent: EmailSiphon
Disallow: /
User-Agent: EmailWolf
Disallow: /
User-Agent: ExtractorPro
Disallow: /
User-Agent: CherryPicker
Disallow: /
User-Agent: NICErsPRO
Disallow: /
User-Agent: Teleport
Disallow: /
User-Agent: EmailCollector
Disallow: /
# OK, everybody else: don't go here.