# stantscherenkow.com - robots.txt # Last updated: 2026-05-03 # ============================================== # Default policy - all crawlers welcome, except transactional pages # ============================================== User-agent: * Allow: / Disallow: /thank-you-apply Disallow: /thank-you-log Disallow: /404 Disallow: /knowledge/_blog-template.html Content-Signal: ai-train=yes, search=yes, ai-input=yes # ============================================== # AI / LLM training and retrieval crawlers - explicit allow # ============================================== # OpenAI User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: OAI-SearchBot Allow: / # Anthropic User-agent: ClaudeBot Allow: / User-agent: Claude-Web Allow: / User-agent: Claude-SearchBot Allow: / User-agent: anthropic-ai Allow: / # Perplexity User-agent: PerplexityBot Allow: / User-agent: Perplexity-User Allow: / # Google Gemini / Vertex AI training User-agent: Google-Extended Allow: / # Apple Intelligence User-agent: Applebot Allow: / User-agent: Applebot-Extended Allow: / # Common Crawl (used by many LLM training pipelines) User-agent: CCBot Allow: / # Meta Llama User-agent: Meta-ExternalAgent Allow: / User-agent: Meta-ExternalFetcher Allow: / User-agent: FacebookBot Allow: / # Amazon / Alexa User-agent: Amazonbot Allow: / # ByteDance / Doubao User-agent: Bytespider Allow: / # TikTok / ByteDance User-agent: TikTokSpider Allow: / # Cohere User-agent: cohere-ai Allow: / User-agent: cohere-training-data-crawler Allow: / # Mistral User-agent: MistralAI-User Allow: / # DeepSeek User-agent: DeepSeekBot Allow: / # You.com User-agent: YouBot Allow: / # Diffbot User-agent: Diffbot Allow: / # Omgili (used in some LLM pipelines) User-agent: omgili Allow: / User-agent: omgilibot Allow: / # Neeva / Kagi successor bots User-agent: NeevaBot Allow: / # Timpi User-agent: TimpiBot Allow: / # ============================================== # SEO / backlink crawlers - crawl-delay only # ============================================== User-agent: SemrushBot Crawl-delay: 10 User-agent: AhrefsBot Crawl-delay: 10 User-agent: MJ12bot Crawl-delay: 10 User-agent: DotBot Crawl-delay: 10 User-agent: BLEXBot Crawl-delay: 10 # ============================================== # Sitemaps # ============================================== Sitemap: https://stantscherenkow.com/sitemap.xml # AI-native context layer # LLMs-txt: https://stantscherenkow.com/llms.txt # AI-txt: https://stantscherenkow.com/ai.txt # Voice-AI: https://stantscherenkow.com/voice-ai.txt # AI access and citation: https://stantscherenkow.com/ai-access