# CrustyCraft Pizza - robots.txt # Site: https://crustycraftpizza.ca # --------------------------------------------------------------- # Default rules - applies to all crawlers # --------------------------------------------------------------- User-agent: * Allow: / Disallow: /components/ Disallow: /config/ Disallow: /search/ Disallow: /account/ Disallow: /api/ Disallow: /static/ Disallow: /*?*author=* Disallow: /*?*tag=* Disallow: /*?*month=* Disallow: /*?*view=* Disallow: /*?*format=* # Block legal pages from all crawlers Disallow: /privacy-policy.html Disallow: /terms-of-use.html # --------------------------------------------------------------- # Major search engines - noindex llms.txt (still allowed to crawl # so the directive can be read, but not indexed in search results) # --------------------------------------------------------------- User-agent: Googlebot Disallow: /privacy-policy.html Disallow: /terms-of-use.html Noindex: /llms.txt User-agent: Bingbot Disallow: /privacy-policy.html Disallow: /terms-of-use.html Noindex: /llms.txt User-agent: Yandex Disallow: /privacy-policy.html Disallow: /terms-of-use.html Noindex: /llms.txt User-agent: DuckDuckBot Disallow: /privacy-policy.html Disallow: /terms-of-use.html Noindex: /llms.txt # --------------------------------------------------------------- # AI / LLM crawlers - explicitly allow llms.txt # --------------------------------------------------------------- User-agent: GPTBot Allow: /llms.txt Allow: / Disallow: /privacy-policy.html Disallow: /terms-of-use.html Disallow: /components/ User-agent: ChatGPT-User Allow: /llms.txt Allow: / Disallow: /privacy-policy.html Disallow: /terms-of-use.html Disallow: /components/ User-agent: OAI-SearchBot Allow: /llms.txt Allow: / Disallow: /privacy-policy.html Disallow: /terms-of-use.html Disallow: /components/ User-agent: CCBot Allow: /llms.txt Allow: / Disallow: /privacy-policy.html Disallow: /terms-of-use.html Disallow: /components/ User-agent: anthropic-ai Allow: /llms.txt Allow: / Disallow: /privacy-policy.html Disallow: /terms-of-use.html Disallow: /components/ User-agent: ClaudeBot Allow: /llms.txt Allow: / Disallow: /privacy-policy.html Disallow: /terms-of-use.html Disallow: /components/ User-agent: Claude-Web Allow: /llms.txt Allow: / Disallow: /privacy-policy.html Disallow: /terms-of-use.html Disallow: /components/ User-agent: cohere-ai Allow: /llms.txt Allow: / Disallow: /privacy-policy.html Disallow: /terms-of-use.html Disallow: /components/ User-agent: Google-Extended Allow: /llms.txt Allow: / Disallow: /privacy-policy.html Disallow: /terms-of-use.html Disallow: /components/ User-agent: PerplexityBot Allow: /llms.txt Allow: / Disallow: /privacy-policy.html Disallow: /terms-of-use.html Disallow: /components/ User-agent: Applebot-Extended Allow: /llms.txt Allow: / Disallow: /privacy-policy.html Disallow: /terms-of-use.html Disallow: /components/ User-agent: meta-externalagent Allow: /llms.txt Allow: / Disallow: /privacy-policy.html Disallow: /terms-of-use.html Disallow: /components/ # --------------------------------------------------------------- # Google Ads bots # --------------------------------------------------------------- User-agent: AdsBot-Google Allow: / User-agent: AdsBot-Google-Mobile Allow: / User-agent: AdsBot-Google-Mobile-Apps Allow: / # --------------------------------------------------------------- # Crawl delay for heavy / lower-priority bots # --------------------------------------------------------------- User-agent: Baiduspider Crawl-delay: 10 User-agent: Sogou Crawl-delay: 10 # --------------------------------------------------------------- # Sitemap location # --------------------------------------------------------------- Sitemap: https://crustycraftpizza.ca/sitemap.xml # LLMs.txt location for AI agents # https://crustycraftpizza.ca/llms.txt