# ============================================================
# robots.txt — yageogroup.com
# Last updated: [2026-04-13]
# Review quarterly for new crawler agents
# ============================================================
# ------------------------------------------------------------
# Default: allow all legitimate crawlers
# ------------------------------------------------------------
User-agent: *
Allow: /
# Disallow URL parameter patterns that generate duplicate content
# (adjust parameter names to match actual query string keys)
Disallow: /*?search=
Disallow: /*?utm_
# ------------------------------------------------------------
# Googlebot — explicitly confirmed, no restrictions
# (redundant with * default above, retained for clarity)
# ------------------------------------------------------------
User-agent: Googlebot
Allow: /
# ------------------------------------------------------------
# Bingbot — set crawl delay to manage server load on launch
# (Google ignores Crawl-delay; this only applies to Bing)
# ------------------------------------------------------------
User-agent: Bingbot
Crawl-delay: 2
# ------------------------------------------------------------
# SEO audit crawlers
# SemrushBot allowed — used internally for site audits
# ------------------------------------------------------------
User-agent: SemrushBot
Allow: /
User-agent: AhrefsBot
Disallow: /
User-agent: MajesticSEO
Disallow: /
User-agent: DotBot
Disallow: /
User-agent: MJ12bot
Disallow: /
# ------------------------------------------------------------
# AI Retrieval Crawlers — allow for AI search answer visibility
# (these power real-time answers and indexing for AI search tools)
# ------------------------------------------------------------
# OpenAI
User-agent: ChatGPT-User
Allow: /
User-agent: OAI-SearchBot
Allow: /
# Anthropic (retrieval only — ClaudeBot is training, see below)
User-agent: Claude-Web
Allow: /
User-agent: Claude-SearchBot
Allow: /
User-agent: Claude-User
Allow: /
# Perplexity
User-agent: PerplexityBot
Allow: /
# DuckDuckGo AI
User-agent: DuckAssistBot
Allow: /
# You.com
User-agent: YouBot
Allow: /
# Brave Search
User-agent: Bravebot
Allow: /
# Apple (Siri, Spotlight, Apple Intelligence — separate from Applebot-Extended training bot)
User-agent: Applebot
Allow: /
# Mistral / Le Chat
User-agent: MistralAI-User
Allow: /
# Meta AI search (distinct from meta-externalagent training bot)
User-agent: meta-webindexer
Allow: /
# ------------------------------------------------------------
# Chinese Search Engine Crawlers — allow for Baidu, Sogou,
# 360 Search, and Shenma indexation
# ------------------------------------------------------------
User-agent: Baiduspider
Allow: /
User-agent: Sogou web spider
Allow: /
User-agent: 360Spider
Allow: /
# Shenma (UCWeb/Alibaba mobile search)
User-agent: YisouSpider
Allow: /
# ------------------------------------------------------------
# Chinese AI Retrieval Crawlers — allow for visibility in
# Kimi, Tongyi Qwen, and other Chinese AI-powered search
# ------------------------------------------------------------
User-agent: MoonshotBot
Allow: /
User-agent: Qwen
Allow: /