# VerifyTheBrand robots policy
# Goal: allow search-engine, social-unfurl, and AI answer-time RETRIEVAL
# crawlers (citations in AI answers are a distribution channel — long-tail
# operator queries are exactly what answer engines retrieve for), block AI
# TRAINING scrapers (we don't want our analyses harvested into model training
# corpora), block /api/* and /auth/* (no useful index content there).
#
# The training/retrieval line, by vendor:
#   OpenAI:     GPTBot = training (blocked) · OAI-SearchBot + ChatGPT-User =
#               search-index/answer-time retrieval (allowed)
#   Anthropic:  ClaudeBot = training (blocked) · Claude-User +
#               Claude-SearchBot = retrieval (allowed)
#   Perplexity: PerplexityBot = answer-engine index (allowed)
#   Meta:       Meta-ExternalAgent = training (blocked) · facebookexternalhit
#               = OG unfurl (allowed)

User-agent: *
Allow: /
Disallow: /api/
Disallow: /auth/
Disallow: /scanning
Disallow: /confirm
Disallow: /shares

# --- AI training scrapers — explicit deny ---
# Our reports are the product. We don't want them ingested into competing
# LLMs' training corpora. Division of labor at the edge: compliant vendors
# (OpenAI, Anthropic, Common Crawl, ...) honor this file, so robots.txt is
# the enforcement for them; the robots-ignoring ByteDance crawlers
# (Bytespider, TikTok Spider) are additionally blocked per-bot at the
# Cloudflare edge (AI Crawl Control), and unverified scrapers get Bot
# Fight Mode challenges.

User-agent: GPTBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: anthropic-ai
Disallow: /

User-agent: Claude-Web
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Meta-ExternalAgent
Disallow: /

User-agent: cohere-ai
Disallow: /

User-agent: Omgilibot
Disallow: /

User-agent: FacebookBot
Disallow: /

User-agent: ImagesiftBot
Disallow: /

User-agent: DiffBot
Disallow: /

Sitemap: https://verifythebrand.com/sitemap.xml