# As a condition of accessing this website, you agree to abide by the following
# content signals:
# (a) If a Content-Signal = yes, you may collect content for the corresponding
# use.
# (b) If a Content-Signal = no, you may not collect content for the
# corresponding use.
# (c) If the website operator does not include a Content-Signal for a
# corresponding use, the website operator neither grants nor restricts
# permission via Content-Signal with respect to the corresponding use.
# The content signals and their meanings are:
# search: building a search index and providing search results (e.g., returning
# hyperlinks and short excerpts from your website's contents). Search does not
# include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
# augmented generation, grounding, or other real-time taking of content for
# generative AI search answers).
# ai-train: training or fine-tuning AI models.
# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.
# BEGIN Cloudflare Managed content
User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /
User-agent: Amazonbot
Disallow: /
User-agent: Applebot-Extended
Disallow: /
User-agent: Bytespider
Disallow: /
User-agent: CCBot
Disallow: /
User-agent: ClaudeBot
Disallow: /
User-agent: CloudflareBrowserRenderingCrawler
Disallow: /
User-agent: Google-Extended
Disallow: /
User-agent: GPTBot
Disallow: /
User-agent: meta-externalagent
Disallow: /
# END Cloudflare Managed Content
# Allow Googlebot full access
User-agent: Googlebot
Disallow:
# Allow Googlebot-Image
User-agent: Googlebot-Image
Disallow:
# ALLOW, BUT SLOW DOWN, other good bots (Bing, DuckDuckGo, etc.)
User-agent: Bingbot
Crawl-delay: 15
Disallow:
User-agent: Slurp
Crawl-delay: 15
Disallow:
User-agent: DuckDuckBot
Crawl-delay: 15
Disallow:
User-agent: Applebot
Crawl-delay: 15
Disallow:
User-agent: Baiduspider
Crawl-delay: 15
Disallow:
# Block specific bad bots
User-agent: ChatGPT-User
Disallow: /
User-agent: AhrefsBot
Disallow: /
User-agent: CriteoBot
Disallow: /
User-agent: imageSpider
Disallow: /
User-agent: proximic
Disallow: /
User-agent: AudigentAdBot
Disallow: /
User-agent: YandexBot
Disallow: /
User-agent: YandexRenderResourcesBot
Disallow: /
User-agent: Centro Ads.txt Crawler
Disallow: /
User-agent: ias-ie
Disallow: /
# Block a few more well-known bad/crawlers (optional)
User-agent: SemrushBot
Disallow: /
User-agent: MJ12bot
Disallow: /
User-agent: DotBot
Disallow: /
User-agent: BLEXBot
Disallow: /
User-agent: PetalBot
Disallow: /
User-agent: DataForSeoBot
Disallow: /
User-agent: magpie-crawler
Disallow: /
User-agent: Mail.RU_Bot
Disallow: /
User-agent: SeekportBot
Disallow: /
User-agent: MegaIndex
Disallow: /
User-agent: Exabot
Disallow: /
User-agent: SISTRIX
Disallow: /
User-agent: SeznamBot
Disallow: /
User-agent: ZoominfoBot
Disallow: /
User-agent: AdsBot
Disallow: /
User-agent: MauiBot
Disallow: /
User-agent: MojeekBot
Disallow: /
User-agent: python
Disallow: /
User-agent: curl
Disallow: /
User-agent: wget
Disallow: /
User-agent: libwww-perl
Disallow: /
User-agent: masscan
Disallow: /
User-agent: zgrab
Disallow: /
# Default: let all normal users in, but protect system dirs
User-agent: *
Disallow: /wp-admin/
Disallow: /wp-includes/
Disallow: /cgi-bin/