'baidu',\r
'YandexBot/',\r
'YandexImages/',\r
+'YandexImageResizer',\r
'YandexMetrika/',\r
'YandexMobileBot/',\r
'yandex',\r
'Abrave',\r
'acapbot/',\r
'Accoona\-AI\-Agent/',\r
+'arcemedia',\r
'AdnormCrawlerCatchBot/',\r
+'adscanner',\r
'aiHitBot/',\r
'aipbot/',\r
'AlphaBot',\r
'BellPagesCA/',\r
'BeNosey[\x20]Mohawk[\x20]Search',\r
'bhcBot',\r
+'bidswitchbot',\r
'BigBozz/',\r
'BinGet/',\r
+'bitlybot',\r
'bl\.uk_lddc_bot/',\r
'BLEXBot/',\r
+'bnf.fr_bot',\r
'boitho\.com\-dc/',\r
'BoogleBot',\r
'BusinessBot:',\r
'CCBot/',\r
'Cliqzbot/',\r
'CMS[\x20]Crawler',\r
+'Companybook\-Crawler',\r
'ConveraCrawler/',\r
+'Contacts-Crawler',\r
+'contxbot',\r
'cosmos/',\r
'crawl/Nutch',\r
'crawler4j',\r
'CRAZYWEBCRAWLER',\r
+'CRMNLCrawlAgent',\r
'CSE[\x20]HTML[\x20]Validator',\r
'C\-T[\x20]bot',\r
+'CUBOT',\r
'Curl/PHP',\r
+'cyencebot',\r
'Dalvik/',\r
'DataCrawler/',\r
'daumoa',\r
'DomainSONOCrawler/',\r
'DomainStatsBot/',\r
'DotBot/',\r
+'DuckDuckBot-Https',\r
'DuckDuckGo\-Favicons\-Bot/',\r
'ELinks/',\r
'ELinks[\x20]\(',\r
'EmailMarketingRobot/',\r
'EmeraldShield\.com[\x20]WebBot',\r
'envolk\[ITS\]spider/',\r
+'eright',\r
'EsperanzaBot',\r
'Exabot/',\r
+'ExtLinksBot',\r
+'ExperianCrawlUK',\r
'facebookexternalhit/',\r
'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de',\r
'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de',\r
'Findxbot/',\r
'FirePHP/',\r
'firstdirectory\-bot',\r
+'flamingo',\r
'FlippyBearBot/',\r
'^foo$',\r
'FreeWebMonitoring[\x20]SiteChecker/',\r
'Gaisbot/',\r
'Gallent[\x20]Spider',\r
'GarlikCrawler/',\r
+'Getintent[\x20]Crawler',\r
'GetintentCrawler[\x20]getintent\.com',\r
'Gigabot/',\r
'gipo\-crawler/Nutch',\r
'Girafabot',\r
'Gluten[\x20]Free[\x20]Crawler/',\r
'gocrawl',\r
+'Gowikibot',\r
'Go\-http\-client/',\r
'GrapeshotCrawler/',\r
'GSiteCrawler/',\r
'idmarch[\x20]Automatic\.beta/',\r
'InbyBot',\r
'Incutio[\x20]XML',\r
+'IndeedBot',\r
'InfluenceBot',\r
'IRLbot/',\r
'IssueCrawler',\r
'Jooblebot',\r
'KomodiaBot/',\r
'Konqueror/',\r
+'laserlikebot',\r
+'Lightspeed',\r
'linkapediabot',\r
'metager\-linkchecker',\r
'linkchecker',\r
'LinksManager\.com_bot',\r
'LWP::Simple/',\r
'Mail\.RU_Bot/',\r
+'makecontact',\r
+'mappy',\r
+'MauiBot',\r
'meanpathbot/',\r
'Mechanize',\r
'Mediatoolkitbot',\r
'MegaIndex\.ru/',\r
'merzscope',\r
+'Meta_Bot',\r
'mfibot/',\r
'microsoft.*discovery',\r
'missigua_locator',\r
'MixrankBot',\r
'MJ12bot/',\r
-'MojeekBot/',\r
+'MojeekBot',\r
'Mojolicious',\r
'MXT/Nutch',\r
'My[\x20]Nutch[\x20]Spider/',\r
'myse/Nutch',\r
+'Naaraa',\r
'NerdyBot',\r
'netEstate[\x20]NE[\x20]Crawler',\r
'NetResearchServer/',\r
+'Nimbostratus-Bot',\r
'nominet',\r
'NRLCorpusBuilder/Nutch',\r
'nutch\-1\.4/',\r
'Offline[\x20]Explorer/',\r
'OmniExplorer_Bot/',\r
'OrangeBot/',\r
+'Orliac',\r
+'OutclicksBot',\r
'PageBitesHyperBot/',\r
'Pcore',\r
'pdffillerbot/',\r
'PhantomJS',\r
'PHP/5\.2\.8',\r
'Pinterestbot',\r
+'PiplBot',\r
'Ploetz[\x20]\+[\x20]Zeller',\r
'Plukkie/',\r
'Princetonbot/',\r
'python_wk_crawler',\r
'Python\-urllib/',\r
'QCrawl',\r
+'Quick-Crawler',\r
'ResearchBot',\r
'roboto',\r
'rogerbot/',\r
'Seekmo',\r
'semanticbot',\r
'SemrushBot/',\r
+'SemrushBot-SI',\r
'seo\-audit\-check\-bot/',\r
'Seobility',\r
'SEOkicks\-Robot',\r
'SEOstats',\r
'Seosys/Nutch',\r
'Seoterritory\.com[\x20]bot',\r
+'serendeputy',\r
'Shim\-Crawler',\r
'SiteExplorer/',\r
'siteexplorer\.info',\r
'SpuhexBot',\r
'spyonweb',\r
'ssearch_bot',\r
+'Streamline3Bot',\r
'SurdotlyBot/',\r
'SurveyBot/',\r
'taiil/Nutch',\r
'Test[\x20]Spider',\r
'TestCrawler',\r
'The[\x20]Knowledge[\x20]AI',\r
+'tracemyfile',\r
+'trendiction',\r
'TurnitinBot/',\r
'TurnitinBot',\r
'TweetmemeBot/',\r
'UCY/Nutch',\r
+'uni-leipzig\.de',\r
'Uptimebot/',\r
'UptimeRobot/',\r
'URL[\x20]Checker',\r
'vBulletin[\x20]via[\x20]PHP',\r
'vebidoobot',\r
'vegi[\x20]bot',\r
+'Velen',\r
'viz/Nutch',\r
'VoilaBot',\r
'VORTEX/',\r
'XoviBot/',\r
'yacybot',\r
'Yahoo[\x20]Link[\x20]Preview',\r
+'yak',\r
'YisouSpider',\r
'yoozBot',\r
'Your\-Website\-Sucks',\r
+'zoominfobot',\r
'zspider/',\r
'ZumBot/',\r
# below placed at end to catch some generics\r
'ucsd',\r
'udmsearch',\r
'ultraseek',\r
+'um\-IC',\r
+'um\-LN',\r
'unchaos_bot_hybrid_web_search_engine',\r
'unido\-bot',\r
'unisterbot',\r
'baidu','Baidu ( catchall )',\r
'YandexBot/','YandexBot',\r
'YandexImages/','YandexImages',\r
+'YandexImageResizer','YandexImageResizer',\r
'YandexMetrika/','YandexMetrika',\r
'YandexMobileBot/','YandexMobileBot',\r
'yandex','Yandex ( catchall )',\r
'acapbot/','acapbot',\r
'Accoona\-AI\-Agent/','Accoona-AI-Agent',\r
'AdnormCrawlerCatchBot/','AdnormCrawlerCatchBot',\r
+'adscanner','adscanner',\r
'aiHitBot/','aiHitBot',\r
'aipbot/','aipbot',\r
'AlphaBot','AlphaBot',\r
'Apache\-HttpClient/','Apache-HttpClient',\r
'Apexoo[\x20]Spider','Apexoo Spider',\r
'Applebot/','Applebot',\r
+'arcemedia','AdsBot-ArceMedia',\r
'archive\.org_bot','archive.org_bot',\r
'Babya[\x20]Discoverer','Babya Discoverer',\r
'Barkrowler','Barkrowler',\r
'BellPagesCA/','BellPagesCA',\r
'BeNosey[\x20]Mohawk[\x20]Search','BeNosey Mohawk Search',\r
'bhcBot','bhcBot',\r
+'bidswitchbot','bidswitchbot',\r
'BigBozz/','BigBozz',\r
'BinGet/','BinGet',\r
+'bitlybot','bit.ly',\r
'bl\.uk_lddc_bot/','bl.uk_lddc_bot',\r
'BLEXBot/','BLEXBot',\r
+'bnf.fr_bot','bnf.fr_bot',\r
'boitho\.com\-dc/','boitho.com-dc',\r
'BoogleBot','BoogleBot',\r
'BusinessBot:','BusinessBot:',\r
'CCBot/','CCBot',\r
'Cliqzbot/','Cliqzbot',\r
'CMS[\x20]Crawler','CMS Crawler',\r
+'Companybook\-Crawler','Companybook-Crawler',\r
'ConveraCrawler/','ConveraCrawler',\r
+'Contacts-Crawler','Contacts-Crawler',\r
+'contxbot','contxbot',\r
'cosmos/','cosmos',\r
+'CRMNLCrawlAgent','CRMNLCrawlAgent',\r
'crawl/Nutch','crawl/Nutch',\r
'crawler4j','crawler4j',\r
'CRAZYWEBCRAWLER','CRAZYWEBCRAWLER',\r
'CSE[\x20]HTML[\x20]Validator','CSE HTML Validator',\r
'C\-T[\x20]bot','C-T bot',\r
+'CUBOT','CUBOT',\r
'Curl/PHP','Curl/PHP',\r
+'cyencebot','cyencebot',\r
'Dalvik/','Dalvik',\r
'DataCrawler/','DataCrawler',\r
'daumoa','daumoa',\r
'DomainSONOCrawler/','DomainSONOCrawler',\r
'DomainStatsBot/','DomainStatsBot',\r
'DotBot/','DotBot',\r
+'DuckDuckBot-Https','DuckDuckBot-Https',\r
'DuckDuckGo\-Favicons\-Bot/','DuckDuckGo-Favicons-Bot',\r
'ELinks/','ELinks',\r
'ELinks[\x20]\(','ELinks (',\r
'EmailMarketingRobot/','EmailMarketingRobot',\r
'EmeraldShield\.com[\x20]WebBot','EmeraldShield.com WebBot',\r
'envolk\[ITS\]spider/','envolk ITS spider',\r
+'eright','eright',\r
'EsperanzaBot','EsperanzaBot',\r
'Exabot/','Exabot',\r
+'ExtLinksBot','ExtLinksBot',\r
+'ExperianCrawlUK','ExperianCrawlUK',\r
'facebookexternalhit/','facebookexternalhit',\r
'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de','FAST Enterprise crawleradmin.t-info@telekom.de',\r
'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de','FAST Enterprise T-Info_BI_cluster crawleradmin.t-info@telekom.de',\r
'Findxbot/','Findxbot',\r
'FirePHP/','FirePHP',\r
'firstdirectory\-bot','firstdirectory-bot',\r
+'flamingo','Flamingo_SearchEngine',\r
'FlippyBearBot/','FlippyBearBot',\r
'^foo$','foo',\r
'FreeWebMonitoring[\x20]SiteChecker/','FreeWebMonitoring SiteChecker',\r
'Gaisbot/','Gaisbot',\r
'Gallent[\x20]Spider','Gallent Spider',\r
'GarlikCrawler/','GarlikCrawler',\r
+'Getintent[\x20]Crawler','GetIntent Crawler',\r
'GetintentCrawler[\x20]getintent\.com','GetintentCrawler getintent.com',\r
'Gigabot/','Gigabot',\r
'gipo\-crawler/Nutch','gipo-crawler/Nutch',\r
'Girafabot','Girafabot',\r
'Gluten[\x20]Free[\x20]Crawler/','Gluten Free Crawler',\r
'gocrawl','gocrawl',\r
+'Gowikibot','Gowikibot',\r
'Go\-http\-client/','Go-http-client',\r
'GrapeshotCrawler/','GrapeshotCrawler',\r
'GSiteCrawler/','GSiteCrawler',\r
'idmarch[\x20]Automatic\.beta/','idmarch Automatic.beta',\r
'InbyBot','InbyBot',\r
'Incutio[\x20]XML','Incutio XML',\r
+'IndeedBot','IndeedBot',\r
'InfluenceBot','InfluenceBot',\r
'IRLbot/','IRLbot',\r
'IssueCrawler','IssueCrawler',\r
'Jooblebot','Jooblebot',\r
'KomodiaBot/','KomodiaBot',\r
'Konqueror/','Konqueror',\r
+'Lightspeed','Lightspeed',\r
'linkapediabot','linkapediabot',\r
'metager\-linkchecker','metager-linkchecker',\r
'linkchecker','linkchecker',\r
'LinksManager\.com_bot','LinksManager.com_bot',\r
'LWP::Simple/','LWP::Simple',\r
'Mail\.RU_Bot/','Mail.RU Bot',\r
+'makecontact','makecontact',\r
+'mappy','Mappy Crawler',\r
+'MauiBot','MauiBot',\r
'meanpathbot/','meanpathbot',\r
'Mechanize','Mechanize',\r
'Mediatoolkitbot','Mediatoolkitbot',\r
'MegaIndex\.ru/','MegaIndex.ru',\r
'merzscope','merzscope',\r
+'Meta_Bot','Meta_Bot',\r
'mfibot/','mfibot',\r
'microsoft.*discovery','Microsoft Office Protocol Discovery',\r
'missigua_locator','missigua_locator',\r
'MixrankBot','MixrankBot',\r
'MJ12bot/','MJ12bot',\r
+'mojeek','mojeek',\r
'MojeekBot/','MojeekBot',\r
'Mojolicious','Mojolicious',\r
'MXT/Nutch','MXT/Nutch',\r
'My[\x20]Nutch[\x20]Spider/','My Nutch Spider',\r
'myse/Nutch','myse/Nutch',\r
+'Naaraa','Naaraa',\r
'NerdyBot','NerdyBot',\r
'netEstate[\x20]NE[\x20]Crawler','netEstate NE Crawler',\r
'NetResearchServer/','NetResearchServer',\r
+'Nimbostratus-Bot','Nimbostratus-Bot',\r
'nominet','nominet',\r
'NRLCorpusBuilder/Nutch','NRLCorpusBuilder/Nutch',\r
'nutch\-1\.4/','nutch-1.4',\r
'Offline[\x20]Explorer/','Offline Explorer',\r
'OmniExplorer_Bot/','OmniExplorer_Bot',\r
'OrangeBot/','OrangeBot',\r
+'Orliac','Orliac',\r
+'OutclicksBot','OutclicksBot',\r
'PageBitesHyperBot/','PageBitesHyperBot',\r
'Pcore','Pcore',\r
'pdffillerbot/','pdffillerbot',\r
'PhantomJS','PhantomJS',\r
'PHP/5\.2\.8','PHP/5.2.8',\r
'Pinterestbot','Pinterestbot',\r
+'PiplBot','PiplBot',\r
'Ploetz[\x20]\+[\x20]Zeller','Ploetz + Zeller',\r
'Plukkie/','Plukkie',\r
'Princetonbot/','Princetonbot',\r
'python_wk_crawler','python_wk_crawler',\r
'Python\-urllib/','Python-urllib',\r
'QCrawl','QCrawl',\r
+'Quick-Crawler','Quick-Crawler',\r
'ResearchBot','ResearchBot',\r
'roboto','roboto',\r
'rogerbot/','rogerbot',\r
'Seekmo','Seekmo',\r
'semanticbot','semanticbot',\r
'SemrushBot/','SemrushBot',\r
+'SemrushBot-SI','SemrushBot-SI',\r
'seo\-audit\-check\-bot/','seo-audit-check-bot',\r
'Seobility','Seobility',\r
'SEOkicks\-Robot','SEOkicks-Robot',\r
'SEOstats','SEOstats',\r
'Seosys/Nutch','Seosys/Nutch',\r
'Seoterritory\.com[\x20]bot','Seoterritory.com.bot',\r
+'serendeputy','serendeputy',\r
'Shim\-Crawler','Shim-Crawler',\r
'SiteExplorer/','SiteExplorer',\r
'siteexplorer\.info','siteexplorer.info',\r
'SpuhexBot','SpuhexBot',\r
'spyonweb','spyonweb',\r
'ssearch_bot','ssearch_bot',\r
+'Streamline3Bot','Streamline3Bot',\r
'SurdotlyBot/','SurdotlyBot',\r
'SurveyBot/','SurveyBot',\r
'taiil/Nutch','taiil/Nutch',\r
'Test[\x20]Spider','Test Spider',\r
'TestCrawler','TestCrawler',\r
'The[\x20]Knowledge[\x20]AI', 'The Knowledge AI',\r
+'tracemyfile','tracemyfile',\r
+'trendiction','trendiction',\r
'TurnitinBot/','TurnitinBot',\r
'TurnitinBot','TurnitinBot',\r
'TweetmemeBot/','TweetmemeBot',\r
'UCY/Nutch','UCY/Nutch',\r
+'uni-leipzig\.de','uni-leipzig.de',\r
'Uptimebot/','Uptimebot',\r
'UptimeRobot/','UptimeRobot',\r
'URL[\x20]Checker','URL Checker',\r
'vBulletin[\x20]via[\x20]PHP','vBulletin via PHP',\r
'vebidoobot','vebidoobot',\r
'vegi[\x20]bot','vegi bot',\r
+'Velen','Velen',\r
'viz/Nutch','viz/Nutch',\r
'VoilaBot','VoilaBot',\r
'VORTEX/','VORTEX',\r
'XoviBot/','XoviBot',\r
'yacybot','yacybot',\r
'Yahoo[\x20]Link[\x20]Preview','Yahoo Link Preview',\r
+'yak','yak-linkfluence',\r
'YisouSpider','YisouSpider',\r
'yoozBot','yoozBot',\r
'Your\-Website\-Sucks','Your-Website-Sucks',\r
+'zoominfobot','zoominfobot',\r
'zspider/','zspider',\r
'ZumBot/','ZumBot',\r
# below placed at end to catch some generics\r
'ucsd','ucsd',\r
'udmsearch','udmsearch',\r
'ultraseek','ultraseek',\r
+'um\-IC','ubermetrics-technologies.com',\r
+'um\-LN','ubermetrics-technologies.com',\r
'unchaos_bot_hybrid_web_search_engine','unchaos_bot_hybrid_web_search_engine',\r
'unido\-bot','unido-bot',\r
'unisterbot','unisterbot',\r