# robots.txt for www.influxdata.com
# See https://www.robotstxt.org/robotstxt.html for documentation
# =============================================================================
# CONTENT SIGNALS POLICY
# =============================================================================
# As a condition of accessing this website, you agree to abide by the following
# content signals:
#
# (a) If a content-signal = yes, you may collect content for the corresponding
# use.
# (b) If a content-signal = no, you may not collect content for the
# corresponding use.
# (c) If the website operator does not include a content signal for a
# corresponding use, the website operator neither grants nor restricts
# permission via content signal with respect to the corresponding use.
#
# The content signals and their meanings are:
#
# search: building a search index and providing search results (e.g., returning
# hyperlinks and short excerpts from your website's contents). Search does not
# include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
# augmented generation, grounding, or other real-time taking of content for
# generative AI search answers).
# ai-train: training or fine-tuning AI models.
#
# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.
# =============================================================================
# STANDARD CRAWLERS - ALLOWED
# =============================================================================
User-agent: Googlebot
Allow: /
User-agent: Bingbot
Allow: /
User-agent: Slurp
Allow: /
User-agent: DuckDuckBot
Allow: /
User-agent: Baiduspider
Allow: /
User-agent: YandexBot
Allow: /
User-agent: QualifiedBot
Allow: /
# =============================================================================
# AI TRAINING BOTS - ALLOWED
# =============================================================================
# OpenAI
User-agent: GPTBot
Allow: /
User-agent: ChatGPT-User
Allow: /
# Common Crawl (used for AI training datasets)
User-agent: CCBot
Allow: /
# Google AI training (separate from search)
User-agent: Google-Extended
Allow: /
# Anthropic
User-agent: anthropic-ai
Allow: /
User-agent: Claude-Web
Allow: /
# Meta/Facebook AI
User-agent: FacebookBot
Allow: /
# Cohere AI
User-agent: cohere-ai
Allow: /
# Perplexity AI
User-agent: PerplexityBot
Allow: /
# Apple Intelligence
User-agent: Applebot-Extended
Allow: /
# Bytedance/TikTok
User-agent: Bytespider
Allow: /
# =============================================================================
# AGGRESSIVE/PROBLEMATIC CRAWLERS - DISALLOWED
# =============================================================================
User-agent: AhrefsBot
Disallow: /
User-agent: SemrushBot
Disallow: /
User-agent: MJ12bot
Disallow: /
User-agent: DotBot
Disallow: /
User-agent: PetalBot
Disallow: /
User-agent: BLEXBot
Disallow: /
User-agent: DataForSeoBot
Disallow: /
User-agent: serpstatbot
Disallow: /
User-agent: SeekportBot
Disallow: /
# =============================================================================
# DEFAULT RULES FOR ALL OTHER CRAWLERS
# =============================================================================
User-agent: *
Allow: /
# Paths that should never be crawled (legacy/non-existent)
Disallow: /wp-admin/
Disallow: /wp-content/
Disallow: /wp-includes/
Disallow: /wp-json/
Disallow: /.env
Disallow: /.git/
Disallow: /admin/
Disallow: /administrator/
# Script/config files that don't exist on this Rails site
Disallow: /*.php$
Disallow: /*.cgi$
Disallow: /*.py$
Disallow: /*.pl$
Disallow: /*.asp$
Disallow: /*.aspx$
Disallow: /*.jsp$
Disallow: /*.cfm$
Disallow: /*.sh$
Disallow: /*.bak$
Disallow: /*.sql$
Disallow: /*.ini$
Disallow: /*.conf$
Disallow: /*.config$
Disallow: /*.yml$
Disallow: /*.yaml$
Disallow: /*.log$
# Preview routes (Contentful preview, not public content)
Disallow: /webinar-preview
Disallow: /blog-preview
Disallow: /customer-preview
Disallow: /page-preview
Disallow: /influxace-preview
Disallow: /integration-preview
Disallow: /event-preview
Disallow: /channel-partner-preview
# Internal/utility routes
Disallow: /webhooks/
Disallow: /cont-webhook
Disallow: /cap
Disallow: /cap/
Disallow: /eml
Disallow: /up
# =============================================================================
# SITEMAP
# =============================================================================
Sitemap: https://www.influxdata.com/sitemap.xml