{
  "version": "2.1.0",
  "last_updated": "2026-03-23",
  "author": "Yohan Ziri",
  "changelog": "Added 'intention' field to classify bots: training, inference, search, scraping",
  "bots": {
    "ai_bots": [
      {"pattern": "GPTBot", "name": "OpenAI GPTBot", "family": "OpenAI", "intention": "training"},
      {"pattern": "ChatGPT-User", "name": "OpenAI ChatGPT User", "family": "OpenAI", "intention": "inference"},
      {"pattern": "ChatGPT Agent", "name": "OpenAI ChatGPT Agent", "family": "OpenAI", "intention": "inference"},
      {"pattern": "OAI-SearchBot", "name": "OpenAI SearchBot", "family": "OpenAI", "intention": "search"},
      {"pattern": "Operator", "name": "OpenAI Operator", "family": "OpenAI", "intention": "inference"},
      {"pattern": "OpenAI", "name": "OpenAI (Generic)", "family": "OpenAI", "intention": "training"},
      {"pattern": "ClaudeBot", "name": "Anthropic ClaudeBot", "family": "Anthropic", "intention": "training"},
      {"pattern": "Claude-Web", "name": "Anthropic Claude Web", "family": "Anthropic", "intention": "inference"},
      {"pattern": "Claude-User", "name": "Anthropic Claude User", "family": "Anthropic", "intention": "inference"},
      {"pattern": "Claude-SearchBot", "name": "Anthropic Claude SearchBot", "family": "Anthropic", "intention": "search"},
      {"pattern": "anthropic-ai", "name": "Anthropic AI", "family": "Anthropic", "intention": "training"},
      {"pattern": "Google-Extended", "name": "Google AI Extended", "family": "Google", "intention": "training"},
      {"pattern": "GoogleAgent-Mariner", "name": "Google Project Mariner", "family": "Google", "intention": "inference"},
      {"pattern": "GoogleOther", "name": "Google Other", "family": "Google", "intention": "scraping"},
      {"pattern": "GoogleOther-Image", "name": "Google Other Image", "family": "Google", "intention": "scraping"},
      {"pattern": "GoogleOther-Video", "name": "Google Other Video", "family": "Google", "intention": "scraping"},
      {"pattern": "Gemini-Deep-Research", "name": "Google Gemini Deep Research", "family": "Google", "intention": "inference"},
      {"pattern": "Google-CloudVertexBot", "name": "Google Vertex AI", "family": "Google", "intention": "training"},
      {"pattern": "Google-NotebookLM", "name": "Google NotebookLM", "family": "Google", "intention": "inference"},
      {"pattern": "NotebookLM", "name": "Google NotebookLM", "family": "Google", "intention": "inference"},
      {"pattern": "Google-Firebase", "name": "Google Firebase AI", "family": "Google", "intention": "training"},
      {"pattern": "FacebookBot", "name": "Meta Facebook Bot", "family": "Meta", "intention": "scraping"},
      {"pattern": "Meta-ExternalAgent", "name": "Meta AI External Agent", "family": "Meta", "intention": "scraping"},
      {"pattern": "meta-externalagent", "name": "Meta AI External Agent", "family": "Meta", "intention": "scraping"},
      {"pattern": "Meta-ExternalFetcher", "name": "Meta External Fetcher", "family": "Meta", "intention": "scraping"},
      {"pattern": "meta-externalfetcher", "name": "Meta External Fetcher", "family": "Meta", "intention": "scraping"},
      {"pattern": "Meta-WebIndexer", "name": "Meta AI Web Indexer", "family": "Meta", "intention": "training"},
      {"pattern": "meta-webindexer", "name": "Meta AI Web Indexer", "family": "Meta", "intention": "training"},
      {"pattern": "facebookexternalhit", "name": "Meta External Hit", "family": "Meta", "intention": "scraping"},
      {"pattern": "BingPreview", "name": "Microsoft Bing AI Preview", "family": "Microsoft", "intention": "search"},
      {"pattern": "AzureAI-SearchBot", "name": "Microsoft Azure AI Search Bot", "family": "Microsoft", "intention": "search"},
      {"pattern": "PerplexityBot", "name": "Perplexity Bot", "family": "Perplexity", "intention": "search"},
      {"pattern": "Perplexity-User", "name": "Perplexity User Agent", "family": "Perplexity", "intention": "inference"},
      {"pattern": "xAI-Bot", "name": "xAI Grok Bot", "family": "xAI", "intention": "training"},
      {"pattern": "DuckAssistBot", "name": "DuckDuckGo AI Assistant", "family": "DuckDuckGo", "intention": "search"},
      {"pattern": "MistralAI-User", "name": "Mistral AI User", "family": "Mistral", "intention": "inference"},
      {"pattern": "CCBot", "name": "Common Crawl", "family": "Data", "intention": "training"},
      {"pattern": "Bytespider", "name": "ByteDance AI", "family": "ByteDance", "intention": "training"},
      {"pattern": "TikTokSpider", "name": "TikTok AI Spider", "family": "ByteDance", "intention": "training"},
      {"pattern": "Amazonbot", "name": "Amazon Alexa Bot", "family": "Amazon", "intention": "training"},
      {"pattern": "AmazonBuyForMe", "name": "Amazon Buy For Me", "family": "Amazon", "intention": "inference"},
      {"pattern": "amazon-kendra", "name": "Amazon Kendra", "family": "Amazon", "intention": "search"},
      {"pattern": "bedrockbot", "name": "Amazon Bedrock Bot", "family": "Amazon", "intention": "training"},
      {"pattern": "Nova Act", "name": "Amazon Nova Act", "family": "Amazon", "intention": "inference"},
      {"pattern": "NovaAct", "name": "Amazon Nova Act", "family": "Amazon", "intention": "inference"},
      {"pattern": "Amzn-SearchBot", "name": "Amazon Search Bot", "family": "Amazon", "intention": "search"},
      {"pattern": "Amzn-User", "name": "Amazon AI User", "family": "Amazon", "intention": "inference"},
      {"pattern": "Applebot-Extended", "name": "Apple AI Training Bot", "family": "Apple", "intention": "training"},
      {"pattern": "PanguBot", "name": "Huawei PanguBot", "family": "Huawei", "intention": "training"},
      {"pattern": "YandexAdditional", "name": "Yandex AI Additional", "family": "Yandex", "intention": "training"},
      {"pattern": "YandexAdditionalBot", "name": "Yandex AI Bot", "family": "Yandex", "intention": "training"},
      {"pattern": "Alibaba-Crawler", "name": "Alibaba AI Crawler", "family": "Alibaba", "intention": "training"},
      {"pattern": "Alibaba-AI", "name": "Alibaba AI", "family": "Alibaba", "intention": "training"},
      {"pattern": "character-ai", "name": "Character.AI Bot", "family": "Character.AI", "intention": "inference"},
      {"pattern": "StabilityBot", "name": "Stability AI Bot", "family": "Stability AI", "intention": "training"},
      {"pattern": "runway-ml", "name": "Runway ML Bot", "family": "Runway", "intention": "training"},
      {"pattern": "poe-chat-bot", "name": "Poe Chat Bot", "family": "Poe", "intention": "inference"},
      {"pattern": "jasper-ai", "name": "Jasper AI Bot", "family": "AI Writing", "intention": "inference"},
      {"pattern": "copy-ai-bot", "name": "Copy.ai Bot", "family": "AI Writing", "intention": "inference"},
      {"pattern": "writesonic-bot", "name": "WriteSonic Bot", "family": "AI Writing", "intention": "inference"},
      {"pattern": "NeevaBot", "name": "Neeva AI Search", "family": "Search AI", "intention": "search"},
      {"pattern": "KomoBot", "name": "Komo AI Bot", "family": "Search AI", "intention": "search"},
      {"pattern": "waldo-crawler", "name": "Waldo Crawler", "family": "Specialized", "intention": "scraping"},
      {"pattern": "archive.org_bot", "name": "Internet Archive Bot", "family": "Data", "intention": "training"},
      {"pattern": "ia_archiver", "name": "Internet Archive", "family": "Data", "intention": "training"},
      {"pattern": "iAskBot", "name": "iAsk.ai Bot", "family": "Search AI", "intention": "search"},
      {"pattern": "iaskspider", "name": "iAsk.ai Spider", "family": "Search AI", "intention": "search"},
      {"pattern": "PhindBot", "name": "Phind AI Bot", "family": "Search AI", "intention": "search"},
      {"pattern": "BraveBot", "name": "Brave Search AI", "family": "Search AI", "intention": "search"},
      {"pattern": "TimpiBot", "name": "Timpi AI Bot", "family": "Search AI", "intention": "search"},
      {"pattern": "YouBot", "name": "You.com AI Bot", "family": "Search AI", "intention": "search"},
      {"pattern": "Andibot", "name": "Andi Search AI", "family": "Search AI", "intention": "search"},
      {"pattern": "AI2Bot", "name": "Allen Institute AI", "family": "Data", "intention": "training"},
      {"pattern": "AI2Bot-DeepResearchEval", "name": "Allen Institute Deep Research", "family": "Data", "intention": "training"},
      {"pattern": "Ai2Bot-Dolma", "name": "Allen Institute Dolma", "family": "Data", "intention": "training"},
      {"pattern": "DeepseekBot", "name": "Deepseek AI", "family": "Data", "intention": "training"},
      {"pattern": "ChatGLM-Spider", "name": "ChatGLM AI Spider", "family": "Data", "intention": "training"},
      {"pattern": "LAIONDownloader", "name": "LAION Downloader", "family": "Data", "intention": "training"},
      {"pattern": "laion-huggingface-processor", "name": "LAION HuggingFace", "family": "Data", "intention": "training"},
      {"pattern": "Cohere-training-data-crawler", "name": "Cohere Training Crawler", "family": "Cohere", "intention": "training"},
      {"pattern": "cohere-ai", "name": "Cohere AI", "family": "Cohere", "intention": "training"},
      {"pattern": "img2dataset", "name": "IMG2Dataset", "family": "Data", "intention": "training"},
      {"pattern": "Crawl4AI", "name": "Crawl4AI", "family": "Data", "intention": "scraping"},
      {"pattern": "FriendlyCrawler", "name": "FriendlyCrawler", "family": "Data", "intention": "scraping"},
      {"pattern": "ISSCyberRiskCrawler", "name": "ISSCyberRiskCrawler", "family": "Data", "intention": "scraping"},
      {"pattern": "KunatoCrawler", "name": "KunatoCrawler", "family": "Data", "intention": "scraping"},
      {"pattern": "Diffbot", "name": "Diffbot", "family": "Tools", "intention": "scraping"},
      {"pattern": "QuillBot", "name": "QuillBot AI", "family": "Tools", "intention": "inference"},
      {"pattern": "BuddyBot", "name": "BuddyBot Learning", "family": "Tools", "intention": "training"},
      {"pattern": "KlaviyoAIBot", "name": "Klaviyo AI Bot", "family": "Tools", "intention": "scraping"},
      {"pattern": "Devin", "name": "Devin AI Agent", "family": "Tools", "intention": "inference"},
      {"pattern": "TwinAgent", "name": "TwinAgent AI", "family": "Tools", "intention": "inference"},
      {"pattern": "Linguee Bot", "name": "Linguee Bot", "family": "Tools", "intention": "training"},
      {"pattern": "Grammarly", "name": "Grammarly AI", "family": "Tools", "intention": "inference"},
      {"pattern": "Notion-Bot", "name": "Notion AI Bot", "family": "Tools", "intention": "inference"},
      {"pattern": "ImagesiftBot", "name": "ImagesiftBot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "omgili", "name": "Omgili Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "omgilibot", "name": "Omgili Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "ICC-Crawler", "name": "NICT AI Crawler", "family": "Specialized", "intention": "training"},
      {"pattern": "Webz.io", "name": "Webz.io Data Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Webzio-Extended", "name": "Webz.io Extended", "family": "Specialized", "intention": "scraping"},
      {"pattern": "webzio-extended", "name": "Webz.io Extended", "family": "Specialized", "intention": "scraping"},
      {"pattern": "FirecrawlAgent", "name": "Firecrawl AI Agent", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Crawlbase", "name": "Crawlbase Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "DataDome", "name": "DataDome Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Hexomatic", "name": "Hexomatic Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "ParseHub", "name": "ParseHub Crawler", "family": "Specialized", "intention": "scraping"},
      {"pattern": "ScrapingBee", "name": "ScrapingBee Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Zyte", "name": "Zyte Crawler", "family": "Specialized", "intention": "scraping"},
      {"pattern": "AkiraBot", "name": "Akira AI Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Kangaroo Bot", "name": "Kangaroo AI Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Brightbot", "name": "Bright Data Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "aiHitBot", "name": "aiHit Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "imageSpider", "name": "Image Spider AI", "family": "Specialized", "intention": "scraping"},
      {"pattern": "ShapBot", "name": "Parallel ShapBot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "TerraCotta", "name": "Ceramic AI TerraCotta", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Thinkbot", "name": "Thinkbot Agency", "family": "Specialized", "intention": "scraping"},
      {"pattern": "WRTNBot", "name": "WRTN Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "ZanistaBot", "name": "Zanista Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "VelenPublicWebCrawler", "name": "Velen AI Crawler", "family": "Specialized", "intention": "scraping"},
      {"pattern": "LinkupBot", "name": "Linkup Bot", "family": "Specialized", "intention": "search"},
      {"pattern": "Cotoyogi", "name": "ROIS Cotoyogi", "family": "Specialized", "intention": "training"},
      {"pattern": "MyCentralAIScraperBot", "name": "MyCentral AI Scraper", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Panscient", "name": "Panscient AI", "family": "Specialized", "intention": "training"},
      {"pattern": "SBIntuitionsBot", "name": "SB Intuitions Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "WARDBot", "name": "WEBSPARK WARDBot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Channel3Bot", "name": "Channel3 Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "AddSearchBot", "name": "AddSearch Bot", "family": "Specialized", "intention": "search"},
      {"pattern": "atlassian-bot", "name": "Atlassian AI Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "bigsur.ai", "name": "Big Sur AI", "family": "Specialized", "intention": "scraping"},
      {"pattern": "EchoboxBot", "name": "Echobox Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "LinerBot", "name": "Liner Bot", "family": "Specialized", "intention": "inference"},
      {"pattern": "Manus-User", "name": "Manus User", "family": "Specialized", "intention": "inference"},
      {"pattern": "Poggio-Citations", "name": "Poggio Citations", "family": "Specialized", "intention": "search"},
      {"pattern": "QualifiedBot", "name": "Qualified Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "YaK", "name": "Meltwater YaK", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Anomura", "name": "Anomura AI", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Awario", "name": "Awario Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Cloudflare-AutoRAG", "name": "Cloudflare AutoRAG", "family": "Specialized", "intention": "training"},
      {"pattern": "Crawlspace", "name": "Crawlspace AI", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Datenbank Crawler", "name": "Datenbank Crawler", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Echobot Bot", "name": "Echobot Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Factset_spyderbot", "name": "Factset Spyderbot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "LCC", "name": "LCC Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "netEstate Imprint Crawler", "name": "netEstate Imprint Crawler", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Poseidon Research Crawler", "name": "Poseidon Research Crawler", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Sidetrade indexer bot", "name": "Sidetrade Indexer", "family": "Specialized", "intention": "scraping"},
      {"pattern": "wpbot", "name": "WPBOT", "family": "Specialized", "intention": "scraping"},
      {"pattern": "ExaBot", "name": "Exa AI Search Bot", "family": "Search AI", "intention": "search"},
      {"pattern": "TavilyBot", "name": "Tavily AI Search Bot", "family": "Search AI", "intention": "inference"},
      {"pattern": "kagi-fetcher", "name": "Kagi AI Fetcher", "family": "Search AI", "intention": "inference"},
      {"pattern": "ApifyBot", "name": "Apify Bot", "family": "Specialized", "intention": "scraping"},
      {"pattern": "ApifyWebsiteContentCrawler", "name": "Apify Website Content Crawler", "family": "Specialized", "intention": "scraping"},
      {"pattern": "Spider", "name": "Spider AI Crawler", "family": "Specialized", "intention": "scraping"}
    ],
    "scrapers": [
      {"pattern": "AhrefsBot", "name": "Ahrefs SEO", "family": "SEO", "intention": "scraping"},
      {"pattern": "SemrushBot", "name": "Semrush SEO", "family": "SEO", "intention": "scraping"},
      {"pattern": "MJ12bot", "name": "Majestic SEO", "family": "SEO", "intention": "scraping"},
      {"pattern": "DotBot", "name": "Moz SEO", "family": "SEO", "intention": "scraping"},
      {"pattern": "Screaming Frog", "name": "Screaming Frog SEO", "family": "SEO", "intention": "scraping"},
      {"pattern": "serpstatbot", "name": "Serpstat SEO", "family": "SEO", "intention": "scraping"},
      {"pattern": "BLEXBot", "name": "BLEXBot Scraper", "family": "Scrapers", "intention": "scraping"},
      {"pattern": "MegaIndex", "name": "MegaIndex Scraper", "family": "Scrapers", "intention": "scraping"},
      {"pattern": "DataForSeoBot", "name": "DataForSEO", "family": "Scrapers", "intention": "scraping"},
      {"pattern": "IbouBot", "name": "Ibou Search Engine", "family": "Scrapers", "intention": "scraping"},
      {"pattern": "Scrapy", "name": "Scrapy Framework", "family": "Scrapers", "intention": "scraping"},
      {"pattern": "SemrushBot-OCOB", "name": "Semrush Bot OCOB", "family": "SEO", "intention": "scraping"},
      {"pattern": "SemrushBot-SWA", "name": "Semrush Bot SWA", "family": "SEO", "intention": "scraping"}
    ],
    "legitimate_bots": [
      {"pattern": "Googlebot", "name": "Google Search", "family": "Google", "intention": "search"},
      {"pattern": "Googlebot-Image", "name": "Google Images", "family": "Google", "intention": "search"},
      {"pattern": "Googlebot-News", "name": "Google News", "family": "Google", "intention": "search"},
      {"pattern": "Googlebot-Video", "name": "Google Video", "family": "Google", "intention": "search"},
      {"pattern": "bingbot", "name": "Bing", "family": "Microsoft", "intention": "search"},
      {"pattern": "msnbot", "name": "MSN", "family": "Microsoft", "intention": "search"},
      {"pattern": "Slurp", "name": "Yahoo", "family": "Yahoo", "intention": "search"},
      {"pattern": "DuckDuckBot", "name": "DuckDuckGo", "family": "DuckDuckGo", "intention": "search"},
      {"pattern": "Baiduspider", "name": "Baidu", "family": "Baidu", "intention": "search"},
      {"pattern": "YandexBot", "name": "Yandex", "family": "Yandex", "intention": "search"},
      {"pattern": "Sogou", "name": "Sogou", "family": "Sogou", "intention": "search"},
      {"pattern": "Applebot", "name": "Apple Bot", "family": "Apple", "intention": "search"},
      {"pattern": "PetalBot", "name": "Huawei PetalBot", "family": "Huawei", "intention": "search"}
    ]
  }
}