#\r
#-------------------------------------------------------\r
\r
+# 2018-03-13 RobC \r
+# Added 36 robots and one generic ( survey ) using v 7.7 robots file as base. \r
+# Also moved robot "Obot" into generics so that it is singled out as an individual Robot. \r
+#\r
# 2016-09-02 RobC \r
# Fixed a few errors and added a few missing bots from awstats 7.5 release.\r
#\r
# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+\s]' and are quoted.\r
#-------------------------------------------------------\r
\r
+\r
@RobotsSearchIDOrder_list1 = (\r
# Common robots (In robot file)\r
'bingbot/',\r
'Google[\x20]Web[\x20]Preview',\r
'Googlebot\-Image/',\r
'Googlebot\-Mobile/',\r
+'Google[\x20]Page[\x20]Speed',\r
'google\-sitemaps',\r
'Googlebot\-News',\r
'Googlebot\-Video/',\r
'AdnormCrawlerCatchBot/',\r
'aiHitBot/',\r
'aipbot/',\r
+'AlphaBot',\r
'Apache\-HttpClient/',\r
'Apexoo[\x20]Spider',\r
'Applebot/',\r
'archive\.org_bot',\r
'Babya[\x20]Discoverer',\r
+'Barkrowler',\r
'BDCbot/',\r
+'BellPagesCA/',\r
+'BeNosey[\x20]Mohawk[\x20]Search',\r
+'bhcBot',\r
+'BigBozz/',\r
'BinGet/',\r
'bl\.uk_lddc_bot/',\r
'BLEXBot/',\r
'boitho\.com\-dc/',\r
+'BoogleBot',\r
'BusinessBot:',\r
'CatchBot/',\r
'CB/Nutch',\r
'Curl/PHP',\r
'Dalvik/',\r
'DataCrawler/',\r
+'daumoa',\r
+'daum',\r
'Deepnet[\x20]Explorer',\r
'DeuSu/',\r
'Digincore',\r
'Discordbot/',\r
+'Dispatch/',\r
+'DnyzBot',\r
'DoCoMo/',\r
'Domain[\x20]Re\-Animator[\x20]Bot',\r
'DomainCrawler/',\r
'findlinks/',\r
'Findxbot/',\r
'FirePHP/',\r
+'firstdirectory\-bot',\r
'FlippyBearBot/',\r
+'^foo$',\r
'FreeWebMonitoring[\x20]SiteChecker/',\r
'fujilabol',\r
'FurlBot/',\r
'Girafabot',\r
'Gluten[\x20]Free[\x20]Crawler/',\r
'gocrawl',\r
+'Go\-http\-client/',\r
'GrapeshotCrawler/',\r
'GSiteCrawler/',\r
'GurujiBot/',\r
+'hadiBot',\r
'HaosouSpider',\r
+'HELLO[\x20]Crawler',\r
'holmes/',\r
+'houzzbot',\r
'HTTP_Request2/',\r
'HubSpot[\x20]Webcrawler',\r
'HyperCrawl/',\r
'ICC\-Crawler/',\r
'iconoclast',\r
'IDGCrawler/Nutch',\r
+'IDG/UK',\r
'idmarch[\x20]Automatic\.beta/',\r
+'InbyBot',\r
'Incutio[\x20]XML',\r
'InfluenceBot',\r
'IRLbot/',\r
'James[\x20]BOT',\r
'Jigsaw/',\r
'JobFeed',\r
+'Jooblebot',\r
'KomodiaBot/',\r
'Konqueror/',\r
'linkapediabot',\r
'LWP::Simple/',\r
'Mail\.RU_Bot/',\r
'meanpathbot/',\r
+'Mechanize',\r
'Mediatoolkitbot',\r
'MegaIndex\.ru/',\r
'merzscope',\r
'NerdyBot',\r
'netEstate[\x20]NE[\x20]Crawler',\r
'NetResearchServer/',\r
+'nominet',\r
'NRLCorpusBuilder/Nutch',\r
'nutch\-1\.4/',\r
'nutch\-1\.8/',\r
'NutchCVS/',\r
'o\.uk[\x20]robot',\r
-'oBot/',\r
'ocrawler;',\r
'ODP[\x20]link[\x20]checker',\r
'Offline[\x20]Explorer/',\r
'OmniExplorer_Bot/',\r
'OrangeBot/',\r
'PageBitesHyperBot/',\r
+'Pcore',\r
'pdffillerbot/',\r
+'peopleman',\r
'PhantomJS',\r
'PHP/5\.2\.8',\r
+'Pinterestbot',\r
'Ploetz[\x20]\+[\x20]Zeller',\r
'Plukkie/',\r
'Princetonbot/',\r
'PrivacyAwareBot/',\r
+'Prlog/',\r
'proximic',\r
'psbot/',\r
'psbot\-image',\r
'SafeSearch[\x20]microdata[\x20]crawler',\r
'safesearch',\r
'SBL\-BOT',\r
+'scrapy',\r
'Screaming[\x20]Frog[\x20]SEO[\x20]Spider/',\r
'ScreenerBot[\x20]Crawler[\x20]Beta',\r
+'Scrubby',\r
'Searchie/',\r
+'SecurityResearch\.bot',\r
'Seekmo',\r
'semanticbot',\r
'SemrushBot/',\r
'Shim\-Crawler',\r
'SiteExplorer/',\r
'siteexplorer\.info',\r
+'siteimprove',\r
'Slackbot\-LinkExpanding',\r
'SmabblerBot/',\r
'Sogou[\x20]web[\x20]spider/',\r
'special_archiver/',\r
'Spiderbot/',\r
+'SpuhexBot',\r
+'spyonweb',\r
'ssearch_bot',\r
'SurdotlyBot/',\r
'SurveyBot/',\r
'vBSEO',\r
'vBulletin[\x20]via[\x20]PHP',\r
'vebidoobot',\r
+'vegi[\x20]bot',\r
'viz/Nutch',\r
'VoilaBot',\r
'VORTEX/',\r
'W3C_Validator/',\r
'W3C\-checklink/',\r
'WBSearchBot/',\r
+'WbSrch/',\r
'WeSEE:Ads/PageBot',\r
'WeSEE:Ads/PictureBot',\r
'WeSEE_Bot',\r
'Yahoo[\x20]Link[\x20]Preview',\r
'YisouSpider',\r
'yoozBot',\r
+'Your\-Website\-Sucks',\r
'zspider/',\r
'ZumBot/',\r
# below placed at end to catch some generics\r
'MSIE[\x20]4',\r
'MSIE[\x20]5',\r
'MSIE[\x20]6',\r
+'MSIE\+6\.0\;',\r
'Windows[\x20]95',\r
'Windows[\x20]98',\r
\r
'cyberspyder',\r
'datafountains/dmoz_downloader',\r
'dataprovider\.com',\r
-'daumoa',\r
'daviesbot',\r
'daylifefeedfetcher',\r
'daypopbot',\r
@RobotsSearchIDOrder_listgen = (\r
# Generic robot\r
'robot',\r
+'oBot/',\r
'blog',\r
'checker',\r
'crawl',\r
'sitemap',\r
'spider',\r
'sucker',\r
+'survey',\r
'validator',\r
'bot[\s_+:,\.\;\/\\\-]',\r
'[\s_+:,\.\;\/\\\-]bot',\r
'Google[\x20]Web[\x20]Preview','Google Web Preview',\r
'Googlebot\-Image/','Googlebot-Image',\r
'Googlebot\-Mobile/','Googlebot-Mobile',\r
+'Google[\x20]Page[\x20]Speed','Google Page Speed',\r
'google\-sitemaps','google-sitemaps',\r
'Googlebot\-News','Googlebot-News',\r
'Googlebot\-Video/','Googlebot-Video',\r
'AdnormCrawlerCatchBot/','AdnormCrawlerCatchBot',\r
'aiHitBot/','aiHitBot',\r
'aipbot/','aipbot',\r
+'AlphaBot','AlphaBot',\r
'Apache\-HttpClient/','Apache-HttpClient',\r
'Apexoo[\x20]Spider','Apexoo Spider',\r
'Applebot/','Applebot',\r
'archive\.org_bot','archive.org_bot',\r
'Babya[\x20]Discoverer','Babya Discoverer',\r
+'Barkrowler','Barkrowler',\r
'BDCbot/','BDCbot',\r
+'BellPagesCA/','BellPagesCA',\r
+'BeNosey[\x20]Mohawk[\x20]Search','BeNosey Mohawk Search',\r
+'bhcBot','bhcBot',\r
+'BigBozz/','BigBozz',\r
'BinGet/','BinGet',\r
'bl\.uk_lddc_bot/','bl.uk_lddc_bot',\r
'BLEXBot/','BLEXBot',\r
'boitho\.com\-dc/','boitho.com-dc',\r
+'BoogleBot','BoogleBot',\r
'BusinessBot:','BusinessBot:',\r
'CatchBot/','CatchBot',\r
'CB/Nutch','CB/Nutch',\r
'Curl/PHP','Curl/PHP',\r
'Dalvik/','Dalvik',\r
'DataCrawler/','DataCrawler',\r
+'daumoa','daumoa',\r
+'daum','daum',\r
'Deepnet[\x20]Explorer','Deepnet Explorer',\r
'DeuSu/','DeuSu',\r
'Digincore','Digincore',\r
'Discordbot/','Discordbot',\r
+'Dispatch/','Dispatch',\r
+'DnyzBot','DnyzBot',\r
'DoCoMo/','DoCoMo',\r
'Domain[\x20]Re\-Animator[\x20]Bot','Domain Re-Animator Bot',\r
'DomainCrawler/','DomainCrawler',\r
'findlinks/','findlinks',\r
'Findxbot/','Findxbot',\r
'FirePHP/','FirePHP',\r
+'firstdirectory\-bot','firstdirectory-bot',\r
'FlippyBearBot/','FlippyBearBot',\r
+'^foo$','foo',\r
'FreeWebMonitoring[\x20]SiteChecker/','FreeWebMonitoring SiteChecker',\r
'fujilabol','fujilabol',\r
'FurlBot/','FurlBot',\r
'Girafabot','Girafabot',\r
'Gluten[\x20]Free[\x20]Crawler/','Gluten Free Crawler',\r
'gocrawl','gocrawl',\r
+'Go\-http\-client/','Go-http-client',\r
'GrapeshotCrawler/','GrapeshotCrawler',\r
'GSiteCrawler/','GSiteCrawler',\r
'GurujiBot/','GurujiBot',\r
+'hadiBot','hadiBot',\r
'HaosouSpider','HaosouSpider',\r
+'HELLO[\x20]Crawler','HELLO Crawler',\r
'holmes/','holmes',\r
+'houzzbot','houzzbot',\r
'HTTP_Request2/','HTTP_Request2',\r
'HubSpot[\x20]Webcrawler','HubSpot Webcrawler',\r
'HyperCrawl/','HyperCrawl',\r
'ICC\-Crawler/','ICC-Crawler',\r
'iconoclast','iconoclast',\r
'IDGCrawler/Nutch','IDGCrawler/Nutch',\r
+'IDG/UK','IDG/UK',\r
'idmarch[\x20]Automatic\.beta/','idmarch Automatic.beta',\r
+'InbyBot','InbyBot',\r
'Incutio[\x20]XML','Incutio XML',\r
'InfluenceBot','InfluenceBot',\r
'IRLbot/','IRLbot',\r
'James[\x20]BOT','James BOT',\r
'Jigsaw/','Jigsaw',\r
'JobFeed','JobFeed',\r
+'Jooblebot','Jooblebot',\r
'KomodiaBot/','KomodiaBot',\r
'Konqueror/','Konqueror',\r
'linkapediabot','linkapediabot',\r
'LWP::Simple/','LWP::Simple',\r
'Mail\.RU_Bot/','Mail.RU Bot',\r
'meanpathbot/','meanpathbot',\r
+'Mechanize','Mechanize',\r
'Mediatoolkitbot','Mediatoolkitbot',\r
'MegaIndex\.ru/','MegaIndex.ru',\r
'merzscope','merzscope',\r
'NerdyBot','NerdyBot',\r
'netEstate[\x20]NE[\x20]Crawler','netEstate NE Crawler',\r
'NetResearchServer/','NetResearchServer',\r
+'nominet','nominet',\r
'NRLCorpusBuilder/Nutch','NRLCorpusBuilder/Nutch',\r
'nutch\-1\.4/','nutch-1.4',\r
'nutch\-1\.8/','nutch-1.8',\r
'NutchCVS/','NutchCVS',\r
'o\.uk[\x20]robot','o uk.robot',\r
-'oBot/','oBot',\r
'ocrawler;','ocrawler;',\r
'ODP[\x20]link[\x20]checker','ODP link checker',\r
'Offline[\x20]Explorer/','Offline Explorer',\r
'OmniExplorer_Bot/','OmniExplorer_Bot',\r
'OrangeBot/','OrangeBot',\r
'PageBitesHyperBot/','PageBitesHyperBot',\r
+'Pcore','Pcore',\r
'pdffillerbot/','pdffillerbot',\r
+'peopleman','peopleman',\r
'PhantomJS','PhantomJS',\r
'PHP/5\.2\.8','PHP/5.2.8',\r
+'Pinterestbot','Pinterestbot',\r
'Ploetz[\x20]\+[\x20]Zeller','Ploetz + Zeller',\r
'Plukkie/','Plukkie',\r
'Princetonbot/','Princetonbot',\r
'PrivacyAwareBot/','PrivacyAwareBot',\r
+'Prlog/','Prlog',\r
'proximic','proximic',\r
'psbot/','psbot',\r
'psbot\-image','psbot-image',\r
'SafeSearch[\x20]microdata[\x20]crawler','SafeSearch microdata crawler',\r
'safesearch','safesearch ( catchall )',\r
'SBL\-BOT','SBL-BOT',\r
+'scrapy','scrapy',\r
'Screaming[\x20]Frog[\x20]SEO[\x20]Spider/','Screaming Frog SEO Spider',\r
'ScreenerBot[\x20]Crawler[\x20]Beta','ScreenerBot Crawler Beta',\r
+'Scrubby','Scrubby',\r
'Searchie/','Searchie',\r
+'SecurityResearch\.bot','Security Research Bot',\r
'Seekmo','Seekmo',\r
'semanticbot','semanticbot',\r
'SemrushBot/','SemrushBot',\r
'Shim\-Crawler','Shim-Crawler',\r
'SiteExplorer/','SiteExplorer',\r
'siteexplorer\.info','siteexplorer.info',\r
+'siteimprove','siteimprove',\r
'Slackbot\-LinkExpanding','Slackbot-LinkExpanding',\r
'SmabblerBot/','SmabblerBot',\r
'Sogou[\x20]web[\x20]spider/','Sogou web spider',\r
'special_archiver/','special_archiver',\r
'Spiderbot/','Spiderbot',\r
+'SpuhexBot','SpuhexBot',\r
+'spyonweb','spyonweb',\r
'ssearch_bot','ssearch_bot',\r
'SurdotlyBot/','SurdotlyBot',\r
'SurveyBot/','SurveyBot',\r
'vBSEO','vBSEO',\r
'vBulletin[\x20]via[\x20]PHP','vBulletin via PHP',\r
'vebidoobot','vebidoobot',\r
+'vegi[\x20]bot','vegi bot',\r
'viz/Nutch','viz/Nutch',\r
'VoilaBot','VoilaBot',\r
'VORTEX/','VORTEX',\r
'W3C_Validator/','W3C_Validator',\r
'W3C\-checklink/','W3C-checklink',\r
'WBSearchBot/','WBSearchBot',\r
+'WbSrch/','WbSrch/',\r
'WeSEE:Ads/PageBot','WeSEE:Ads/PageBot',\r
'WeSEE:Ads/PictureBot','WeSEE:Ads/PictureBot',\r
'WeSEE_Bot','WeSEE_Bot',\r
'Yahoo[\x20]Link[\x20]Preview','Yahoo Link Preview',\r
'YisouSpider','YisouSpider',\r
'yoozBot','yoozBot',\r
+'Your\-Website\-Sucks','Your-Website-Sucks',\r
'zspider/','zspider',\r
'ZumBot/','ZumBot',\r
# below placed at end to catch some generics\r
'MSIE[\x20]4','MSIE 4 - ( Rogue Robot )',\r
'MSIE[\x20]5','MSIE 5 - ( Rogue Robot )',\r
'MSIE[\x20]6','MSIE 6 - ( Rogue Robot )',\r
+'MSIE\+6\.0\;','MSIE+6.0; - ( Rogue Robot)',\r
'Windows[\x20]95','Windows 95 - ( Rogue Robot )',\r
'Windows[\x20]98','Windows 99 - ( Rogue Robot )',\r
\r
'cyberspyder','cyberspyder',\r
'datafountains/dmoz_downloader','datafountains/dmoz_downloader',\r
'dataprovider\.com','dataprovider.com',\r
-'daumoa','daumoa',\r
'daviesbot','daviesbot',\r
'daylifefeedfetcher','daylifefeedfetcher',\r
'daypopbot','daypopbot',\r
\r
# Generic robot\r
'robot','robot',\r
+'oBot/','oBot',\r
'blog','blog',\r
'checker','checker',\r
'crawl','crawl',\r
'sitemap','sitemap',\r
'spider','spider',\r
'sucker','sucker',\r
+'survey','survey',\r
'validator','validator',\r
'bot[\s_+:,\.\;\/\\\-]','Unknown robot identified by bot\*',\r
'[\s_+:,\.\;\/\\\-]bot','Unknown robot identified by \*bot',\r