#
# This restricts access to only known and registered robots.
#
#Modified by Daniel - took out all whitelisted bots, we can
#add blacklists here and in web server if needed...
User-agent: *
Disallow: /cgi-bin/
Disallow: /systems/platforms/linux/software/package/
#page fragments included by CMS
#Disallow: /webapps/page_body/
#Disallow: /webapps/page_title/
#Staging server
Disallow: /webapps-dev/
# note: Someone complained that their email to cvc-users was picked up
# by google (because their signature line had their phone number).
# I thought it a reasonable expectation that such email not be picked
# up by google, since folks very often use a signature line that
# includes such information. So i agree with the person that
# complained and called it "Bad Practice".
# For most lists, this is not allowed, so these exceptions must
# have been requested by the list owners.
# So if they complain, i guess we can explain then?
# But, for now, i commented out all 4 Allows below.
# -aph- 11/29/2018
# Changes reverted. smt-lib, smt-comp, and fom lists are crawlable again.
# -robb- 07/25/2019
Allow: /pipermail/smt-lib
Allow: /pipermail/smt-comp
Allow: /pipermail/fom
Disallow: /pipermail/
#Added 2018/11/27 by NF
#Do not crawl computing.nyu.edu test site
Disallow: /computing/
#Added 2015/11/25 by NF
#Do not crawl Django Admin site
Disallow: /dynamic/admin/
#Added 2015/11/25 by NF
Disallow: /archive201511/
#Added 2015/11/24 by NF
#CS website has moved from /web/ to /home/
Allow: /web/
Allow: /webapps/
#Added 2016/03/03 by NF. See Ticket#2016030210001058.
Disallow: /cs/review/
#Added 2015/06/09
#Google Search Appliance seems to abuse classroom calendar.
Disallow: /webapps/classrooms/
# I believe the following will eliminate many of the 404's that result from
# crawling javascript such as
# var AUTH_TOKEN = 'm0IBKGTI83RXdNSm25OtcWWCyfDE6SLQWkkBosLVvmA=';
Disallow: /*=$
User-agent: AhrefsBot
Crawl-Delay: 2