# Other robots reported by users
'1\-more_scanner',
'360spider',
+'a6-indexer',
'accoona\-ai\-agent',
'activebookmark',
'adamm_bot',
'bubing',
'bumblebee',
'candlelight[_+ ]favorites[_+ ]inspector',
+'careerbot',
'cbn00glebot',
'cerberian_drtrs',
'cfnetwork',
'computer_and_automation_research_institute_crawler',
'converamultimediacrawler',
'converacrawler',
+'copubbot',
'cscrawler',
'cse_html_validator_lite_online',
'cuasarbot',
'html[_+ ]link[_+ ]validator',
'httrack',
'hundesuche\.com\-bot',
+'i-bot',
'ichiro',
'iltrovatore\-setaccio',
'infobot',
'mediapartners\-google',
'megite',
'metaspinner',
+'miadev',
+'microsoft bits',
'microsoft.*discovery', # = 'microsoft (?:office (?:protocol|existence)|data access internet publishing provider protocol) discovery',
'microsoft[_+ ]url[_+ ]control',
'mini\-reptile',
'schizozilla',
'scumbot',
'searchguild[_+ ]dmoz[_+ ]experiment',
+'searchmetricsbot',
'seekbot',
+'semrushbot',
'sensis_web_crawler',
+'seokicks\.de',
'seznambot',
'shim\-crawler',
'shoutcast',
'sohu\-search',
'sohu', # "sohu agent"
'snappy',
+'spbot',
'sphere_scout',
'spiderlytics',
'spip',
'w3c_validator',
'watchmouse',
'wavefire',
+'waybackarchive\.org',
'webclipping\.com',
'webcompass',
'webcrawl\.net',
# Other robots reported by users
'1\-more_scanner','<a href="http://www.myzips.com/software/1-More-Scanner.phtml" title="1-More Scanner home page [new window]" target="_blank">1-More Scanner</a>',
'360spider','<a href="https://www.google.com/search?q=360spider+-Ferrari" title="No home page, using Google search instead [new window]" target="_blank">360spider</a>',
+'a6-indexer', '<a href="http://www.a6corp.com/a6-web-scraping-policy/" rel="nofollow" title="A6-Indexer [new window]" target="_blank">A6-Indexer</a>',
'accoona\-ai\-agent','<a href="http://www.accoona.com/" title="Accoona-AI-Agent home page [new window]" target="_blank">Accoona-AI-Agent</a>',
'activebookmark','<a href="http://www.libmaster.com/active_bookmark.php" title="ActiveBookmark home page [new window]" target="_blank">ActiveBookmark</a>',
'adamm_bot','<a href="http://home.blic.net/adamm/" title="Bot home page [new window]" target="_blank">AdamM Bot</a>',
'bubing', '<a href="http://law.di.unimi.it/BUbiNG.html" title="BUbiNG [new window]" target="_blank">BUbiNG</a>',
'bumblebee', 'Bumblebee (relevare.com)',
'candlelight[_+ ]favorites[_+ ]inspector','<a href="http://www.candlelight.com/home.html" title="Candlelight_Favorites_Inspector home page [new window]" target="_blank">Candlelight_Favorites_Inspector</a>',
+'careerbot', '<a href="http://www.career-x.de/bot.html" rel="nofollow" title="CareerBot home page [new window]" target="_blank">CareerBot</a>',
'cbn00glebot','cbn00glebot',
'cerberian_drtrs','<a href="http://www.pgts.com.au/cgi-bin/psql?robot_info=25240" title="Bot home page [new window]" target="_blank">Cerberian Drtrs</a>',
'cfnetwork','<a href="http://www.cocoadev.com/index.pl?CFNetwork" title="CFNetwork home page [new window]" target="_blank">CFNetwork</a>',
'computer_and_automation_research_institute_crawler','<a href="http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html" title="Computer and Automation Research Institute Crawler home page [new window]" target="_blank">Computer and Automation Research Institute Crawler</a>',
'converamultimediacrawler','<a href="http://www.authoritativeweb.com/crawl/" title="ConveraMultiMediaCrawler home page [new window]" target="_blank">ConveraMultiMediaCrawler</a>',
'converacrawler','<a href="http://www.authoritativeweb.com/crawl/" title="ConveraCrawler home page [new window]" target="_blank">ConveraCrawler</a>',
+'copubbot', '<a href="http://www.copub.com/bot.php" rel="nofollow" title="CoPubbot Home Page [new window] Note: Access to bot home page gave a 404 error on Dec 21, 2013" target="_blank">CoPubbot</a>',
'cscrawler','CsCrawler',
'cse_html_validator_lite_online','<a href="http://online.htmlvalidator.com/php/onlinevallite.php" title="CSE HTML Validator Lite Online home page [new window]" target="_blank">CSE HTML Validator Lite Online</a>','cuasarbot','<a href="http://www.cuasar.com/" title="Cuasarbot home page [new window]" target="_blank">Cuasarbot</a>',
'cursor','<a href="http://adcenter.hu/docs/en/bot.html " title="Cursor home page [new window]" target="_blank">Cursor</a>',
'html[_+ ]link[_+ ]validator','<a href="http://www.lithopssoft.com/ " title="Html_Link_Validator home page [new window]" target="_blank">Html_Link_Validator</a>',
'httrack','<a href="http://www.httrack.com/" title="Bot home page [new window]" target="_blank">HTTrack off-line browser</a>',
'hundesuche\.com\-bot','<a href="http://www.hundesuche.com/" title="Hundesuche.com-Bot home page [new window]" target="_blank">Hundesuche.com-Bot</a>',
+'i-bot','i-bot',
'ichiro','<a href="http://help.goo.ne.jp/door/crawlerE.html" title="Bot home page [new window]" target="_blank">ichiro</a>',
'iltrovatore\-setaccio','<a href="http://www.iltrovatore.it/aiuto/motore_di_ricerca.html" title="bot@iltrovatore.it IlTrovatore-Setaccio home page [new window]" target="_blank">IlTrovatore-Setaccio</a>',
'infobot','<a href="http://www.infobot.org/" title="InfoBot home page [new window]" target="_blank">InfoBot</a>',
'megite','<a href="http://www.megite.com/" title="Megite home page [new window]" target="_blank">Megite</a>',
'metager\-linkchecker','MetaGer LinkChecker',
'metaspinner','<a href="http://index.meta-spinner.de/" title="Metaspinner home page [new window]" target="_blank">Metaspinner</a>',
+'miadev', '<a href="http://www.mia-marktplatz.de/spider" rel="nofollow" title="MiaDev spider [new window]" target="_blank">MiaDev spider</a>',
+'microsoft bits', '<a href="http://msdn.microsoft.com/en-us/library/bb968799%28v=vs.85%29.aspx" rel="nofollow" title="Microsoft Background Intelligent Transfer Service (BITS)? [new window]" target="_blank">Microsoft Background Intelligent Transfer Service (BITS)?</a>',
'microsoft.*discovery', '<a href="http://support.microsoft.com/kb/838028/en-us" title="Microsoft KB838028 [new window]" target="_blank">Microsoft Office Protocol Discovery</a>/<a href="http://blogs.msdn.com/b/vsofficedeveloper/archive/2008/03/11/office-existence-discovery-protocol.aspx" title="Description of the Microsoft Office Existence Discovery [new window]" target="_blank">Microsoft Office Existence Discovery</a>',
'microsoft[_+ ]url[_+ ]control','<a href="http://www.webmasterworld.com/forum11/1005.htm" title="Microsoft URL Control home page [new window]" target="_blank">Microsoft URL Control</a>',
'minirank','<a href="http://minirank.com/" title="miniRank home page [new window]" target="_blank">miniRank</a>',
'schizozilla','<a href="http://spamhuntress.com/2005/03/18/gizmo/ " title="Schizozilla home page [new window]" target="_blank">Schizozilla</a>',
'scumbot','Scumbot',
'searchguild[_+ ]dmoz[_+ ]experiment','<a href="http://www.searchguild.com/" title="SearchGuild_DMOZ_Experiment home page [new window]" target="_blank">SearchGuild_DMOZ_Experiment</a>',
+'searchmetricsbot','<a href="http://www.searchmetrics.com/en/searchmetrics-bot/" rel="nofollow" title="SearchmetricsBot [new window]" target="_blank">SearchmetricsBot</a>',
'seekbot','<a href="http://www.seekbot.net/bot.html" title="Bot home page [new window]">Seekbot</a>',
+'semrushbot', '<a href="http://www.semrush.com/bot.html" rel="nofollow" title="SemrushBot [new window]" target="_blank">SemrushBot</a>',
'sensis_web_crawler','<a href="http://www.sensis.com.au/" title="Sensis Web Crawler home page [new window]" target="_blank">Sensis Web Crawler</a>',
+'seokicks\.de', '<a href="http://www.seokicks.de/robot.html" rel="nofollow" title="SEOkicks Webcrawler home page [new window]" target="_blank">SEOkicks Webcrawler</a>',
'seznambot','<a href="http://fulltext.seznam.cz/" title="Bot home page [new window]" target="_blank">SeznamBot</a>',
'shim\-crawler','<a href="http://www.logos.ic.i.u-tokyo.ac.jp/crawler/" title="crawl@logos.ic.i.u-tokyo.ac.jp Bot home page [new window]" target="_blank">Shim-Crawler</a>',
'shoutcast','Shoutcast Directory Service',
'sohu\-search','<a href="http://corp.sohu.com/" title="Bot home page [new window]" target="_blank">sohu-search</a>',
'sohu','<a href="http://corp.sohu.com/" title="Bot home page [new window]" target="_blank">sohu agent</a>',
'snappy','<a href="http://www.urltrends.com/faq.php" title="Bot home page [new window]" target="_blank">Snappy</a>',
+'spbot', '<a href="http://www.seoprofiler.com/bot" rel="nofollow" title="SEOprofiler Bot [new window]" target="_blank">SEOprofiler Bot</a>',
'sphere_scout','<a href="http://www.sphere.com/" title="Bot home page [new window]" target="_blank">Sphere Scout</a>',
'spip','<a href="http://www.spip.net" title="SPIP home page [new window]" target="_blank">SPIP</a>',
'sproose_crawler','<a href="http://www.sproose.com/bot.html" title="Bot home page [new window]" target="_blank">sproose crawler</a>',
'w3c_validator','<a href="http://validator.w3.org/" title="Bot home page [new window]" target="_blank">W3C Validator</a>',
'watchmouse', '<a href="http://www.watchmouse.com/en/" title="WatcMouse">WatchMouse Website Monitor</a>',
'wavefire','<a href="http://www.wavefire.com" title="info@wavefire.com; Bot home page [new window]" target="_blank">Wavefire</a>',
+'waybackarchive\.org', '<span title="Maybe related to spiderlytics.">No website, email: spider(at)waybackarchive.org</span>',
+# 2.12.2013 Project Honeypot reports at least one of the IPs used by waybackarchive with a spiderlytics UA string.
+# Problably not related to the wayback machine of archive.org.
'webclipping\.com', 'WebClipping.com',
'webcompass', 'webcompass',
'webcrawl\.net','<a href="http://www.webcrawl.net/" title="webcrawl.net home page [new window]" target="_blank">webcrawl.net</a>',
'scanner', 'Unknown robot (identified by \'scanner\')',
'spider', 'Unknown robot (identified by \'spider\')',
'sucker', 'Unknown robot (identified by \'sucker\')',
-'bot[\s_+:,\.\;\/\\\-]','Unknown robot (identified by \'bot*\')',
-'[\s_+:,\.\;\/\\\-]bot','Unknown robot (identified by \'*bot\')',
+'bot[\s_+:,\.\;\/\\\-]','Unknown robot (identified by \'bot\' followed by a space or one of the following characters _+:,.;/\-)',
+'[\s_+:,\.\;\/\\\-]bot','Unknown robot (identified by \'bot\' preceded by a space or one of the following characters _+:,.;/\-)',
'curl', 'Common *nix tool for automating web document retireval. Most likely a bot.',
'php', 'A PHP script',
'ruby\/', 'Ruby script',
'googlee\.',
'googlecom\.com',
'goggle\.co\.hu',
+'216\.239\.32\.20',
'216\.239\.(35|37|39|51)\.100',
'216\.239\.(35|37|39|51)\.101',
'216\.239\.5[0-9]\.104',
'duckduckgo\.com',
'sr\.facemoods\.com',
'shoppstop\.com',
+'searchya\.com',
+'picsearch\.de',
+'webssearches\.com',
+'zapmeta\.de',
+'localmoxie\.com',
# Chello Portals
'chello\.at',
'chello\.be',
'sumaja\.de',
'navigationshilfe\.t-online\.de',
'umfis\.de',
+'fastbot\.de',
+'tixuma\.de',
# Minor Hungarian search engines
'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu',
'tango\.hu',
'googlee\.','google',
'googlecom\.com','google',
'goggle\.co\.hu','google',
+'216\.239\.32\.20', 'google',
'216\.239\.(35|37|39|51)\.100','google_cache',
'216\.239\.(35|37|39|51)\.101','google_cache',
'216\.239\.5[0-9]\.104','google_cache',
'duckduckgo\.com', 'duckduckgo',
'sr\.facemoods\.com', 'facemoods',
'shoppstop\.com', 'shoppstop',
+'searchya\.com', 'searchya',
+'picsearch\.de', 'picsearch',
+'webssearches\.com', 'webssearches',
+'zapmeta\.de', 'zapmeta',
+'localmoxie\.com', 'localmoxie',
# Chello Portals
'chello\.at','chelloat',
'chello\.be','chellobe',
'sumaja\.de', 'sumaja',
'navigationshilfe\.t-online\.de', 'navigationshilfe',
'umfis\.de', 'umfis',
+'fastbot\.de', 'fastbot_de',
+'tixuma\.de', 'tixuma_de',
# Minor Hungarian search engines
'heureka\.hu','heureka',
'vizsla\.origo\.hu','origo',
'duckduckgo', 'uddg=',
'facemoods', 'q=',
'shoppstop', 'keywords=',
+'searchya', 'q=',
+'picsearch', 'q=',
+'webssearches', 'q=',
+'zapmeta', 'query=',
+'localmoxie', 'keyword=',
# Chello Portals
'chelloat','q1=',
'chellobe','q1=',
#'sumaja', 'no query string available', #There is no query string in the referrer url
'navigationshilfe', 'q=',
'umfis', 'suchbegriff=',
+'fastbot_de', 'red=[0-9]*\+',
+'tixuma_de', 'sc=',
# Minor Hungarian search engines
'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=',
'keresolap_hu','q=',
'duckduckgo', '<a href="http://r.duckduckgo.com/" rel="nofollow" title="DuckDuckGo Home Page [new window]" target="_blank">DuckDuckGo (Does not provide search keyphrases, using found page instead)</a>',
'facemoods', '<a href="http://sr.facemoods.com/" title="Facemoods Search [new window]" target="_blank">Facemoods Search</a>',
'shoppstop', '<a href="http://www.shoppstop.com/" title="ShoppStop [new window]" target="_blank">ShoppStop</a>',
+'searchya', '<a href="http://www.searchya.com/" title="Searchya [new window]" target="_blank">Searchya</a>',
+'picsearch', '<a href="http://www.picsearch.de/" title="picsearch [new window]" target="_blank">picsearch</a>',
+'webssearches', '<a href="http://www.webssearches.com/" title="Web Searches [new window]" target="_blank">Web Searches</a>',
+'zapmeta', '<a href="http://www.zapmeta.de/" title="ZapMeta [new window]" target="_blank">ZapMeta</a>',
+'localmoxie', '<a href="http://www.localmoxie.com/" title="Local Moxie [new window]" target="_blank">Local Moxie</a>',
# Chello Portals
'chelloat','<a href="http://www.chello.at/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Austria</a>',
'chellobe','<a href="http://www.chello.be/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Belgium</a>',
'sumaja', '<a href="http://www.sumaja.de/" rel="nofollow" title="Sumaja [new window]" target="_blank">Sumaja</a>',
'navigationshilfe', '<a href="http://navigationshilfe.t-online.de/" rel="nofollow" title="T-Online Navigationshilfe [new window]" target="_blank">T-Online Navigationshilfe</a>',
'umfis', '<a href="http://www.umfis.de/" title="UMFIS-Online Das Umweltfirmen-Informationssystem der IHKs in Deutschland [new window]" target="_blank">UMFIS-Online Das Umweltfirmen-Informationssystem der IHKs in Deutschland</a>',
+'fastbot_de', '<a href="http://fastbot.de/" title="Fastbot.de [new window]" target="_blank">Fastbot.de (Does not provide search keyphrases, using found page instead)</a>',
+'tixuma_de', '<a href="http://www.tixuma.de/" title="Tixuma Deutschland [new window]" target="_blank">Tixuma Deutschland</a>',
# Minor hungarian search engines
'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkereso', 'goliat','Goliat', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Kereso',
'tango_hu','<a href="http://tango.hu/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Tango</a>',