# added Z-Add Link Checker http://w3.z-add.co.uk/linkcheck/
# -- fix - some robots were reported with _ where _ should have been a space.
# changed Xenu Link Sleuth
-# changed microsoft[_+ ]url[_+ ]control -> microsoft_url_control
+# changed microsoft[_+\s]url[_+\s]control -> microsoft_url_control
# changed favorites_sweeper -> favorites_sweeper
# -- updates
# updated AskJeeves to Ask
# used to know in which order to search Robot IDs.
# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more
# Minor robots are in list2, used when LevelForRobotsDetection is 2 or more
-# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+ ]' and are quoted.
+# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+\s]' and are quoted.
#-------------------------------------------------------
@RobotsSearchIDOrder_list1 = (
# Common robots (In robot file)
'googlebot\-image',
'googlebot',
'google\-sitemaps',
-'google[_+ ]web[_+ ]preview',
+'google[_+\s]web[_+\s]preview',
'grabber',
'gulliver',
-'virus[_+ ]detector', # Must be before harvest
+'virus[_+\s]detector', # Must be before harvest
'harvest',
'htdig',
'jeeves',
'linkwalker',
'lilina',
-'lycos[_+ ]',
+'lycos[_+\s]',
'moget',
'muscatferret',
'myweb',
'kapsi',
'katipo',
'kilroy',
-'ko[_+ ]yappo[_+ ]robot',
+'ko[_+\s]yappo[_+\s]robot',
'kummhttp',
'labelgrabber\.txt',
'larbin',
'snooper',
'solbot',
'speedy',
-'spider[_+ ]monkey',
+'spider[_+\s]monkey',
'spiderbot',
'spiderline',
'spiderman',
'betabot',
'biglotron',
'bittorrent_bot',
-'biz360[_+ ]spider',
-'blogbridge[_+ ]service',
+'biz360[_+\s]spider',
+'blogbridge[_+\s]service',
'bloglines',
'blogpulse',
'blogsearch',
'boris',
'bubing',
'bumblebee',
-'candlelight[_+ ]favorites[_+ ]inspector',
+'candlelight[_+\s]favorites[_+\s]inspector',
'careerbot',
'cbn00glebot',
'cerberian_drtrs',
'edgeio\-retriever',
'ets_v',
'exactseek',
-'extreme[_+ ]picture[_+ ]finder',
+'extreme[_+\s]picture[_+\s]finder',
'eventax',
'everbeecrawler',
'everest\-vulcan',
'hoowwwer',
'hpprint',
'htmlparser',
-'html[_+ ]link[_+ ]validator',
+'html[_+\s]link[_+\s]validator',
'httrack',
'hundesuche\.com\-bot',
'i-bot',
'infomine',
'insurancobot',
'integromedb\.org',
-'internet[_+ ]ninja',
+'internet[_+\s]ninja',
'internetarchive',
'internetseer',
'internetsupervision',
'isearch2006',
'istellabot',
'iupui_research_bot',
-'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility',
+'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility',
'justview',
'kalambot',
'kamano\.de_newsfeedverzeichnis',
'miadev',
'microsoft bits',
'microsoft.*discovery', # = 'microsoft (?:office (?:protocol|existence)|data access internet publishing provider protocol) discovery',
-'microsoft[_+ ]url[_+ ]control',
+'microsoft[_+\s]url[_+\s]control',
'mini\-reptile',
'minirank',
'missigua_locator',
'nutch', # Must come after other nutch versions
'ocelli',
'octora_beta_bot',
-'omniexplorer[_+ ]bot',
-'onet\.pl[_+ ]sa',
+'omniexplorer[_+\s]bot',
+'onet\.pl[_+\s]sa',
'onfolio',
'opentaggerbot',
'openwebspider',
'pear_http_request_class',
'peerbot',
'perman',
-'php[_+ ]version[_+ ]tracker',
+'php[_+\s]version[_+\s]tracker',
'pictureofinternet',
'ping\.blo\.gs',
'plinki',
'sbider',
'schizozilla',
'scumbot',
-'searchguild[_+ ]dmoz[_+ ]experiment',
+'searchguild[_+\s]dmoz[_+\s]experiment',
'searchmetricsbot',
'seekbot',
'semrushbot',
'vortex',
'vse\/',
'w3c\-checklink',
-'w3c[_+ ]css[_+ ]validator[_+ ]jfouffa',
+'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa',
'w3c_validator',
'watchmouse',
'wavefire',
'webfilter',
'webindexer',
'webminer',
-'website[_+ ]monitoring[_+ ]bot',
+'website[_+\s]monitoring[_+\s]bot',
'webvulncrawl',
'wells_search',
'wesee:search',
'^motorola$',
'movabletype',
# These appear to be bots trying to hide. All of the usual architecture data is missing.
-'^mozilla\/3\.0 \(compatible$',
+'^mozilla\/3\.0\s\(compatible$',
'^mozilla\/4\.0$',
-'^mozilla\/4\.0 \(compatible;\)$',
+'^mozilla\/4\.0\s\(compatible;\)$',
'^mozilla\/5\.0$',
-'^mozilla\/5\.0 \(compatible;$',
-'^mozilla\/5\.0 \(en\-us\)$',
-'^mozilla\/5\.0 firefox\/3\.0\.5$',
+'^mozilla\/5\.0\s\(compatible;$',
+'^mozilla\/5\.0\s\(en\-us\)$',
+'^mozilla\/5\.0\sfirefox\/3\.0\.5$',
'^msie',
# End of hiding bots.
'netnewswire',
'googlebot','<a href="http://www.google.com/bot.html" title="Bot home page [new window]" target="_blank">Googlebot</a>',
'google\-sitemaps', 'Google Sitemaps',
'grabber', '<a href="http://www.sdsc.edu/" title="Seltsame Aktivitaeten vom San Diego Supercomputer Center [new window]" target="_blank">Grabber (SDSC)</a>',
-'google[_+ ]web[_+ ]preview', 'Google Web Preview',
+'google[_+\s]web[_+\s]preview', 'Google Web Preview',
'gulliver','Northern Light Gulliver',
-'virus[_+ ]detector','<a href="http://www.securecomputing.com/" title="virus_harvester@securecomputing.com; Bot home page [new window]" target="_blank">virus_detector</a>',
+'virus[_+\s]detector','<a href="http://www.securecomputing.com/" title="virus_harvester@securecomputing.com; Bot home page [new window]" target="_blank">virus_detector</a>',
'harvest','Harvest',
'htdig','ht://Dig',
'jeeves','<a href="http://sp.ask.com/docs/about/tech_crawling.html" title="Bot home page [new window]" target="_blank">Ask</a>',
'linkwalker','LinkWalker',
'lilina','Lilina',
-'lycos[_+ ]','Lycos',
+'lycos[_+\s]','Lycos',
'moget','moget',
'muscatferret','Muscat Ferret',
'myweb','Internet Shinchakubin',
'kapsi','image.kapsi.net',
'katipo','Katipo',
'kilroy','Kilroy',
-'ko[_+ ]yappo[_+ ]robot','KO_Yappo_Robot',
+'ko[_+\s]yappo[_+\s]robot','KO_Yappo_Robot',
'kummhttp','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b" title="Bot documentation page [new window]" target="_blank">KummHttp</a>',
'labelgrabber\.txt','LabelGrabber',
'larbin','<a href="http://para.inria.fr/~ailleret/larbin/index-eng.html" title="Bot home page [new window]" target="_blank">larbin</a>',
'snooper','Snooper',
'solbot','Solbot',
'speedy','<a href="http://www.entireweb.com/about/search_tech/speedyspider/" title="Speedy Spider home page [new window]" target="_blank">Speedy Spider</a>',
-'spider[_+ ]monkey','Spider monkey',
+'spider[_+\s]monkey','Spider monkey',
'spiderbot','SpiderBot',
'spiderline','Spiderline Crawler',
'spiderlytics', 'Spiderlytics: No homepage, e-mail only: spider (at) spiderlytics.com',
'betabot','BetaBot',
'biglotron','<a href="http://www.biglotron.com/robot.html" title="Bot home page [new window]" target="_blank">Biglotron</a>',
'bittorrent_bot','<a href="http://www.bittorrent.com/" title="Bot home page [new window]" target="_blank">BitTorrent Bot</a>',
-'biz360[_+ ]spider','<a href="http://www.biz360.com/" title="blogsmanager@biz360.com Bot home page [new window]" target="_blank">Biz360 spider</a>',
-'blogbridge[_+ ]service','<a href="http://www.blogbridge.com/" title="Bot home page [new window]" target="_blank">BlogBridge Service</a>',
+'biz360[_+\s]spider','<a href="http://www.biz360.com/" title="blogsmanager@biz360.com Bot home page [new window]" target="_blank">Biz360 spider</a>',
+'blogbridge[_+\s]service','<a href="http://www.blogbridge.com/" title="Bot home page [new window]" target="_blank">BlogBridge Service</a>',
'bloglines','<a href="http://www.bloglines.com/" title="Bot home page [new window]" target="_blank">Bloglines</a>',
'blogpulse','<a href="http://www.intelliseek.com/" title="Bot home page [new window]" target="_blank">BlogPulse ISSpider intelliseek.com</a>',
'blogsearch','<a href="http://www.icerocket.com/" title="Bot home page [new window]" target="_blank">BlogSearch</a>',
'boris', 'Boris',
'bubing', '<a href="http://law.di.unimi.it/BUbiNG.html" title="BUbiNG [new window]" target="_blank">BUbiNG</a>',
'bumblebee', 'Bumblebee (relevare.com)',
-'candlelight[_+ ]favorites[_+ ]inspector','<a href="http://www.candlelight.com/home.html" title="Candlelight_Favorites_Inspector home page [new window]" target="_blank">Candlelight_Favorites_Inspector</a>',
+'candlelight[_+\s]favorites[_+\s]inspector','<a href="http://www.candlelight.com/home.html" title="Candlelight_Favorites_Inspector home page [new window]" target="_blank">Candlelight_Favorites_Inspector</a>',
'careerbot', '<a href="http://www.career-x.de/bot.html" rel="nofollow" title="CareerBot home page [new window]" target="_blank">CareerBot</a>',
'cbn00glebot','cbn00glebot',
'cerberian_drtrs','<a href="http://www.pgts.com.au/cgi-bin/psql?robot_info=25240" title="Bot home page [new window]" target="_blank">Cerberian Drtrs</a>',
'edgeio\-retriever','<a href="http://www.edgeio.com/" title="Bot home page [new window]" target="_blank">edgeio-retriever</a>',
'ets_v','<a href="http://www.freetranslation.com/help/" title="ETS home page [new window]" target="_blank">ETS</a> Enterprise Translation Server',
'exactseek','ExactSeek Crawler',
-'extreme[_+ ]picture[_+ ]finder','<a href="http://www.exisoftware.com/" title="Extreme_Picture_Finder home page [new window]" target="_blank">Extreme_Picture_Finder</a>',
+'extreme[_+\s]picture[_+\s]finder','<a href="http://www.exisoftware.com/" title="Extreme_Picture_Finder home page [new window]" target="_blank">Extreme_Picture_Finder</a>',
'eventax','<a href="http://www.eventax.de/" title="eventax home page [new window]" target="_blank">eventax</a>',
'everbeecrawler','EverbeeCrawler',
'everest\-vulcan','<a href="http://everest.vulcan.com/crawlerhelp" title="Bot home page [new window]" target="_blank">Everest-Vulcan</a>',
'hoowwwer','<a href="http://cosco.hiit.fi/search/hoowwwer/" title="HooWWWer home page [new window]" target="_blank">HooWWWer</a>',
'hpprint','HPPrint',
'htmlparser','<a href="http://htmlparser.sourceforge.net/" title="HTMLParser home page [new window]" target="_blank">HTMLParser</a>',
-'html[_+ ]link[_+ ]validator','<a href="http://www.lithopssoft.com/ " title="Html_Link_Validator home page [new window]" target="_blank">Html_Link_Validator</a>',
+'html[_+\s]link[_+\s]validator','<a href="http://www.lithopssoft.com/ " title="Html_Link_Validator home page [new window]" target="_blank">Html_Link_Validator</a>',
'httrack','<a href="http://www.httrack.com/" title="Bot home page [new window]" target="_blank">HTTrack off-line browser</a>',
'hundesuche\.com\-bot','<a href="http://www.hundesuche.com/" title="Hundesuche.com-Bot home page [new window]" target="_blank">Hundesuche.com-Bot</a>',
'i-bot','i-bot',
'infomine','<a href="http://infomine.ucr.edu/useragents" title="Bot home page [new window]" target="_blank">INFOMINE VLCrawler</a>',
'insurancobot','<a href="http://www.fastspywareremoval.com/" title="InsurancoBot home page [new window]" target="_blank">InsurancoBot</a>',
'integromedb\.org','<a href="http://www.integromedb.org/Crawler" title="IntegromeDB home page [new window]" target="_blank">IntegromeDB</a>',
-'internet[_+ ]ninja','<a href="http://www.dti.ne.jp/ " title="Internet_Ninja home page [new window]" target="_blank">Internet_Ninja </a>',
+'internet[_+\s]ninja','<a href="http://www.dti.ne.jp/ " title="Internet_Ninja home page [new window]" target="_blank">Internet_Ninja </a>',
'internetarchive','<a href="http://lucene.apache.org/nutch/bot.html " title="InternetArchive home page [new window]" target="_blank">InternetArchive</a>',
'internetseer', 'InternetSeer',
'internetsupervision','<a href="http://internetsupervision.com/" title="InternetSupervision home page [new window]" target="_blank">InternetSupervision</a>',
'isearch2006','<a href="http://www.yahoo.com.cn/" title="isearch2006 home page [new window]" target="_blank">isearch2006</a>',
'istellabot', '<a href="http://www.tiscali.it/" title="IstellaBot [new window]" target="_blank">IstellaBot</a>',
'iupui_research_bot','<a href="http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/" title="IUPUI_Research_Bot home page [new window]" target="_blank">IUPUI_Research_Bot</a>',
-'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility','<a href="http://www.jrtwine.com/Products/CheckFavs/" title="JRTwine_Software_Check_Favorites_Utility home page [new window]" target="_blank">JRTwine_Software_Check_Favorites_Utility</a>',
+'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility','<a href="http://www.jrtwine.com/Products/CheckFavs/" title="JRTwine_Software_Check_Favorites_Utility home page [new window]" target="_blank">JRTwine_Software_Check_Favorites_Utility</a>',
'justview', 'JustView',
'kalambot','<a href="http://64.124.122.251/feedback.html" title="KalamBot home page [new window]" target="_blank">KalamBot</a>',
'kamano\.de_newsfeedverzeichnis','<a href="http://www.kamano.de/" title="kamano.de NewsFeedVerzeichnis home page [new window]" target="_blank">kamano.de NewsFeedVerzeichnis</a>',
'miadev', '<a href="http://www.mia-marktplatz.de/spider" rel="nofollow" title="MiaDev spider [new window]" target="_blank">MiaDev spider</a>',
'microsoft bits', '<a href="http://msdn.microsoft.com/en-us/library/bb968799%28v=vs.85%29.aspx" rel="nofollow" title="Microsoft Background Intelligent Transfer Service (BITS)? [new window]" target="_blank">Microsoft Background Intelligent Transfer Service (BITS)?</a>',
'microsoft.*discovery', '<a href="http://support.microsoft.com/kb/838028/en-us" title="Microsoft KB838028 [new window]" target="_blank">Microsoft Office Protocol Discovery</a>/<a href="http://blogs.msdn.com/b/vsofficedeveloper/archive/2008/03/11/office-existence-discovery-protocol.aspx" title="Description of the Microsoft Office Existence Discovery [new window]" target="_blank">Microsoft Office Existence Discovery</a>',
-'microsoft[_+ ]url[_+ ]control','<a href="http://www.webmasterworld.com/forum11/1005.htm" title="Microsoft URL Control home page [new window]" target="_blank">Microsoft URL Control</a>',
+'microsoft[_+\s]url[_+\s]control','<a href="http://www.webmasterworld.com/forum11/1005.htm" title="Microsoft URL Control home page [new window]" target="_blank">Microsoft URL Control</a>',
'minirank','<a href="http://minirank.com/" title="miniRank home page [new window]" target="_blank">miniRank</a>',
'mini\-reptile','Mini-reptile',
'missigua_locator','<a href="http://www.webmasterworld.com/forum11/2690.htm" title="Missigua_Locator home page [new window]" target="_blank">Missigua_Locator</a>',
'nutch','<a href="http://lucene.apache.org/nutch/" title="Bot home page. Used by many, including Looksmart. [new window]" target="_blank">Nutch</a>',
'ocelli','<a href="http://www.globalspec.com/Ocelli/" title="Ocelli home page [new window]" target="_blank">Ocelli</a>',
'octora_beta_bot','<a href="http://www.octora.com/" title="Bot home page [new window]" target="_blank">Octora Beta Bot</a>',
-'omniexplorer[_+ ]bot','<a href="http://www.omni-explorer.com/" title="Bot home page. [new window]" target="_blank">OmniExplorer Bot</a>',
-'onet\.pl[_+ ]sa','<a href="http://szukaj.onet.pl/" title="Onet.pl_SA home page [new window]" target="_blank">Onet.pl_SA</a>',
+'omniexplorer[_+\s]bot','<a href="http://www.omni-explorer.com/" title="Bot home page. [new window]" target="_blank">OmniExplorer Bot</a>',
+'onet\.pl[_+\s]sa','<a href="http://szukaj.onet.pl/" title="Onet.pl_SA home page [new window]" target="_blank">Onet.pl_SA</a>',
'onfolio','<a href="http://www.onfolio.com/" title="Bot home page [new window]">Onfolio</a>',
'opentaggerbot','<a href="http://www.opentagger.com/opentaggerbot.htm" title="Bot home page [new window]">OpenTaggerBot</a>',
'openwebspider','<a href="http://www.openwebspider.org/" title="OpenWebSpider home page [new window]" target="_blank">OpenWebSpider</a>',
'pear_http_request_class','<a href="http://pear.php.net/" title="PEAR HTTP Request class home page [new window]" target="_blank">PEAR HTTP Request class</a>',
'peerbot','<a href="http://www.peerbot.com/" title="PEERbot home page [new window]" target="_blank">PEERbot</a>',
'perman', 'Perman surfer',
-'php[_+ ]version[_+ ]tracker','<a href="http://www.nexen.net/phpversion/bot.php" title="PHP Version Tracker home page [new window]" target="_blank">PHP version tracker</a>',
+'php[_+\s]version[_+\s]tracker','<a href="http://www.nexen.net/phpversion/bot.php" title="PHP Version Tracker home page [new window]" target="_blank">PHP version tracker</a>',
'pictureofinternet','<a href="http://malfunction.org/poi/" title="PictureOfInternet home page [new window]" target="_blank">PictureOfInternet</a>',
'ping\.blo\.gs','<a href="http://blo.gs/ping.php" title="Bot home page. [new window]" target="_blank">ping.blo.gs</a>',
'plinki','<a href="http://www.plinki.com/" title="plinki home page [new window]" target="_blank">plinki</a>',
'sbider','<a href="http://www.sitesell.com/sbider.html" title="Bot home page [new window]" target="_blank">SBIder</a>',
'schizozilla','<a href="http://spamhuntress.com/2005/03/18/gizmo/ " title="Schizozilla home page [new window]" target="_blank">Schizozilla</a>',
'scumbot','Scumbot',
-'searchguild[_+ ]dmoz[_+ ]experiment','<a href="http://www.searchguild.com/" title="SearchGuild_DMOZ_Experiment home page [new window]" target="_blank">SearchGuild_DMOZ_Experiment</a>',
+'searchguild[_+\s]dmoz[_+\s]experiment','<a href="http://www.searchguild.com/" title="SearchGuild_DMOZ_Experiment home page [new window]" target="_blank">SearchGuild_DMOZ_Experiment</a>',
'searchmetricsbot','<a href="http://www.searchmetrics.com/en/searchmetrics-bot/" rel="nofollow" title="SearchmetricsBot [new window]" target="_blank">SearchmetricsBot</a>',
'seekbot','<a href="http://www.seekbot.net/bot.html" title="Bot home page [new window]">Seekbot</a>',
'semrushbot', '<a href="http://www.semrush.com/bot.html" rel="nofollow" title="SemrushBot [new window]" target="_blank">SemrushBot</a>',
'vortex','<a href="http://marty.anstey.ca/projects/robots/vortex/" title="Bot home page [new window]" target="_blank">VORTEX</a>',
'vse\/','<a href="http://www.vivisimo.com/" title="VSE home page [new window]" target="_blank">VSE</a>',
'w3c\-checklink','<a href="http://validator.w3.org/checklink/" title="Bot home page [new window]" target="_blank">W3C Link Checker</a>',
-'w3c[_+ ]css[_+ ]validator[_+ ]jfouffa', '<a href="http://jigsaw.w3.org/css-validator/" title="Bot home page [new window]" target="_blank">W3C jigsaw CSS Validator</a>',
+'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa', '<a href="http://jigsaw.w3.org/css-validator/" title="Bot home page [new window]" target="_blank">W3C jigsaw CSS Validator</a>',
'w3c_validator','<a href="http://validator.w3.org/" title="Bot home page [new window]" target="_blank">W3C Validator</a>',
'watchmouse', '<a href="http://www.watchmouse.com/en/" title="WatcMouse">WatchMouse Website Monitor</a>',
'wavefire','<a href="http://www.wavefire.com" title="info@wavefire.com; Bot home page [new window]" target="_blank">Wavefire</a>',
'webfilter','<a href="http://www.verso.com/enterprise/netspective/webfilter.asp" title="Bot home page [new window]" target="_blank">WebFilter</a>',
'webindexer','<a href="mailto://webindexerv1@yahoo.com" title="WebIndexer home page [new window]" target="_blank">WebIndexer</a>',
'webminer','<a href="http://64.124.122.252/feedback.html" title="WebMiner home page [new window]" target="_blank">WebMiner</a>',
-'website[_+ ]monitoring[_+ ]bot','<a href="http://InternetSupervision.com/UrlMonitor/3/" title="Website_Monitoring_Bot home page [new window]" target="_blank">Website_Monitoring_Bot</a>',
+'website[_+\s]monitoring[_+\s]bot','<a href="http://InternetSupervision.com/UrlMonitor/3/" title="Website_Monitoring_Bot home page [new window]" target="_blank">Website_Monitoring_Bot</a>',
'webvulncrawl', 'WebVulnCrawl',
'wells_search','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b " title="Wells Search home page [new window]" target="_blank">Wells Search</a>',
'wesee:search', '<a href="http://www.wesee.com/en/support/bot/" title="WeSEE Bot Home Page (gave a 404-Error on Nov. 2, 2013) [new window]" target="_blank">WeSEE Bot</a>',
'microsoft\-webdav\-miniredir', 'microsoft-webdav-miniredir',
'^motorola$', 'Suspected Bot masquerading as "Motorola"',
'movabletype', 'movabletype',
-'^mozilla\/3\.0 \(compatible$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/3\.0\s\(compatible$', 'Suspected bot masqurading as Mozilla',
'^mozilla\/4\.0$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/4\.0 \(compatible;\)$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/4\.0\s\(compatible;\)$', 'Suspected bot masqurading as Mozilla',
'^mozilla\/5\.0$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/5\.0 \(compatible;$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/5\.0 \(en\-us\)$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/5\.0 firefox\/3\.0\.5$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/5\.0\s\(compatible;$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/5\.0\s\(en\-us\)$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/5\.0\sfirefox\/3\.0\.5$', 'Suspected bot masqurading as Mozilla',
'^msie', 'Suspected bot masquerading as M$ IE',
'netnewswire', 'netnewswire',
' netseer ', '<a href="http://www.netseer.com/crawler.html">Net Seer</a>',
'fast\-webcrawler'=>'AllTheWeb',
'googlebot'=>'Google',
'google\-sitemap'=>'Google',
-'google[_+ ]web[_+ ]preview'=>'Google',
+'google[_+\s]web[_+\s]preview'=>'Google',
'msnbot'=>'MSN',
'nutch'=>'Looksmart',
'scooter'=>'AltaVista',