# As a condition of accessing this website, you agree to abide by the following
# content signals:
# (a) If a Content-Signal = yes, you may collect content for the corresponding
# use.
# (b) If a Content-Signal = no, you may not collect content for the
# corresponding use.
# (c) If the website operator does not include a Content-Signal for a
# corresponding use, the website operator neither grants nor restricts
# permission via Content-Signal with respect to the corresponding use.
# The content signals and their meanings are:
# search: building a search index and providing search results (e.g., returning
# hyperlinks and short excerpts from your website's contents). Search does not
# include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
# augmented generation, grounding, or other real-time taking of content for
# generative AI search answers).
# ai-train: training or fine-tuning AI models.
# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.
# BEGIN Cloudflare Managed content
User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /
User-agent: Amazonbot
Disallow: /
User-agent: Applebot-Extended
Disallow: /
User-agent: Bytespider
Disallow: /
User-agent: CCBot
Disallow: /
User-agent: ClaudeBot
Disallow: /
User-agent: CloudflareBrowserRenderingCrawler
Disallow: /
User-agent: Google-Extended
Disallow: /
User-agent: GPTBot
Disallow: /
User-agent: meta-externalagent
Disallow: /
# END Cloudflare Managed Content
# .__________________________.
# | .___________________. |==|
# | | ................. | | |
# | | ::[ Dear robot ]: | | |
# | | ::::[ be nice ]:: | | |
# | | ::::::::::::::::: | | |
# | | ::::::::::::::::: | | |
# | | ::::::::::::::::: | | |
# | | ::::::::::::::::: | | ,|
# | !___________________! |(c|
# !_______________________!__!
# / \
# / [][][][][][][][][][][][][] \
# / [][][][][][][][][][][][][][] \
#( [][][][][____________][][][][] )
# \ ------------------------------ /
# \______________________________/
#
# Last updated: 2025-12-12 by Ibrahim Alhas.
# --------------------------------------------------------------------
# Cloudflare / Content Signals policy (human-readable explanation)
# --------------------------------------------------------------------
# As a condition of accessing this website, you agree to abide by the
# following content signals:
#
# (a) If a content-signal = yes, you may collect content for the
# corresponding use.
# (b) If a content-signal = no, you may not collect content for the
# corresponding use.
# (c) If the website operator does not include a content signal for a
# corresponding use, the website operator neither grants nor
# restricts permission via content signal with respect to that use.
#
# The content signals and their meanings are:
#
# search: building a search index and providing search results
# (e.g., returning hyperlinks and short excerpts from the
# website's contents). Search does not include providing
# AI-generated search summaries.
#
# ai-input: inputting content into one or more AI models (e.g.,
# retrieval augmented generation, grounding, or other
# real-time use of content for generative AI search answers).
#
# ai-train: training or fine-tuning AI models.
#
# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS
# RESERVATIONS OF RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION
# DIRECTIVE 2019/790 ON COPYRIGHT AND RELATED RIGHTS IN THE DIGITAL
# SINGLE MARKET.
# --------------------------------------------------------------------
# 1. Default rules – allow normal crawling & search indexing
# --------------------------------------------------------------------
# We allow standard search indexing, but do NOT permit use of our
# content for AI input or AI training.
User-agent: *
Content-Signal: search=yes, ai-input=no, ai-train=no
Crawl-delay: 30
Disallow:
# Explicitly restate for Google web search; AI training is controlled
# separately via Google-Extended below.
User-agent: Googlebot
Content-Signal: search=yes, ai-input=no, ai-train=no
Crawl-delay: 30
Disallow:
# Explicitly restate for Bing web search.
User-agent: Bingbot
Content-Signal: search=yes, ai-input=no, ai-train=no
Crawl-delay: 30
Disallow:
# Aggressive generic crawler we wish to block entirely.
User-agent: BLEXBot
Disallow: /
# --------------------------------------------------------------------
# 2. AI / LLM-specific crawlers – blocked
# --------------------------------------------------------------------
# These user-agents are commonly associated with AI training or AI
# search services. We do not permit crawling or reuse of our content
# by these bots.
# OpenAI
User-agent: GPTBot
Disallow: /
User-agent: OAI-SearchBot
Disallow: /
User-agent: ChatGPT-User
Disallow: /
# Anthropic (Claude)
User-agent: ClaudeBot
Disallow: /
User-agent: Claude-Web
Disallow: /
User-agent: Claude-User
Disallow: /
User-agent: Claude-SearchBot
Disallow: /
# Perplexity
User-agent: PerplexityBot
Disallow: /
User-agent: Perplexity-User
Disallow: /
# Google AI training (separate from standard search indexing)
User-agent: Google-Extended
Disallow: /
# CommonCrawl (widely used in AI training corpora)
User-agent: CCBot
Disallow: /
# Apple AI training
User-agent: Applebot-Extended
Disallow: /
# Meta / Facebook
User-agent: FacebookBot
Disallow: /
# ByteDance
User-agent: Bytespider
Disallow: /
# Amazon
User-agent: Amazonbot
Disallow: /
# --------------------------------------------------------------------
# 3. Notes
# --------------------------------------------------------------------
# - Standard search engines that respect robots.txt (Googlebot,
# Bingbot, etc.) are allowed to crawl under the default rules.
# - Content-Signal values indicate that traditional search indexing
# is permitted, but AI input and AI training uses are not.
# - robots.txt is an advisory mechanism: compliant crawlers will
# respect it; hostile or disguised scrapers may ignore it and
# must be handled via other measures (e.g. Cloudflare Bot
# Management, WAF, rate limiting).
# - Additional AI/LLM crawlers can be added to the blocked list
# as the ecosystem evolves.