# updated AskJeeves to Ask
# 2012-06-05 Albrecht Mueller
# added Grabber from SDSC (San Diego Supercomputer Center).
+# 2013-09-30 Albrecht Mueller
+# AWStats probably cannot detect this bot as it identifies itself in
+# the referrer field and not in the user agent string.
+#92.113.100.35 - - [29/Sep/2013:17:22:46 +0200] "GET /robots.txt HTTP/1.1" 200 516 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
+#92.113.100.35 - - [29/Sep/2013:17:22:49 +0200] "GET /tghome.htm HTTP/1.1" 200 4445 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
+#92.113.100.35 - - [29/Sep/2013:17:22:51 +0200] "GET / HTTP/1.1" 200 5467 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
# to do MS Search 4.0 Robot
'abonti\.com',
'acme\.spider',
'ahoythehomepagefinder',
+'ahrefsbot',
'alkaline',
'anthill',
'arachnophilia',
'xget',
# Other robots reported by users
'1\-more_scanner',
+'360spider',
'accoona\-ai\-agent',
'activebookmark',
'adamm_bot',
'aspseek',
'asterias',
'awbot',
+'backlinktest\.com',
'baiduspider',
'becomebot',
'bender',
'cursor',
'custo',
'datafountains\/dmoz_downloader',
+'dataprovider\.com',
'daviesbot',
'daypopbot',
'deepindex',
'iltrovatore\-setaccio',
'infobot',
'infociousbot',
+'infohelfer',
'infomine',
'insurancobot',
+'integromedb\.org',
'internet[_+ ]ninja',
'internetarchive',
'internetseer',
'ips\-agent',
'irlbot',
'isearch2006',
+'istellabot',
'iupui_research_bot',
'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility',
'justview',
'mediapartners\-google',
'megite',
'metaspinner',
+'microsoft.*discovery', # = 'microsoft (?:office (?:protocol|existence)|data access internet publishing provider protocol) discovery',
'microsoft[_+ ]url[_+ ]control',
'mini\-reptile',
'minirank',
'msrabot',
'msrbot',
'mt::telegraph::agent',
+'mydoyouhike',
'nagios',
'nasa_search',
-'mydoyouhike',
+'netestate ne crawler',
'netluchs',
'netsprint',
'newsgatoronline',
'sohu', # "sohu agent"
'snappy',
'sphere_scout',
+'spiderlytics',
'spip',
'sproose_crawler',
+'ssearch_bot',
'steeler',
'steroid__download',
'suchfin\-bot',
'yandex',
'flexum',
'yanga',
+'yet-another-spider',
'yooglifetchagent',
'z\-add_link_checker',
'zealbot',
'yourls',
'zemanta',
'zend_http_client',
+'zumbot',
# Other id that are 99% of robots
'wget',
'libwww',
'abonti\.com','<a href="http://www.abonti.com/" title="Abonti WebSearch [new window]" target="_blank">Abonti WebSearch</a>',
'acme\.spider','Acme.Spider',
'ahoythehomepagefinder','Ahoy! The Homepage Finder',
+'ahrefsbot', '<a href="http://ahrefs.com/robot/" title="Bot home page [new window]" target="_blank">AhrefsBot</a>',
'alkaline','Alkaline',
'anthill','Anthill',
'arachnophilia','Arachnophilia',
'spider[_+ ]monkey','Spider monkey',
'spiderbot','SpiderBot',
'spiderline','Spiderline Crawler',
+'spiderlytics', 'Spiderlytics: No homepage, e-mail only: spider (at) spiderlytics.com',
'spiderman','<a href="http://www.iscrawling.com" title="Spiderman home page [new window]" target="_blank">Spiderman</a>',
'spiderview','SpiderView(tm)',
'spry','Spry Wizard Robot',
'xget','XGET',
# Other robots reported by users
'1\-more_scanner','<a href="http://www.myzips.com/software/1-More-Scanner.phtml" title="1-More Scanner home page [new window]" target="_blank">1-More Scanner</a>',
+'360spider','<a href="https://www.google.com/search?q=360spider+-Ferrari" title="No home page, using Google search instead [new window]" target="_blank">360spider</a>',
'accoona\-ai\-agent','<a href="http://www.accoona.com/" title="Accoona-AI-Agent home page [new window]" target="_blank">Accoona-AI-Agent</a>',
'activebookmark','<a href="http://www.libmaster.com/active_bookmark.php" title="ActiveBookmark home page [new window]" target="_blank">ActiveBookmark</a>',
'adamm_bot','<a href="http://home.blic.net/adamm/" title="Bot home page [new window]" target="_blank">AdamM Bot</a>',
'aspseek','<a href="http://www.aspseek.org/" title="Bot home page [new window]" target="_blank">ASPseek</a>',
'asterias', 'Asterias',
'awbot', 'AWBot',
+'backlinktest\.com', '<a href="http://www.backlinktest.com/crawler.html" title="BacklinkCrawler [new window]" target="_blank">BacklinkCrawler</a>',
'baiduspider','<a href="http://www.baidu.com/search/spider.html" title="Bot home page [new window]" target="_blank">BaiDuSpider</a>',
'becomebot', '<a href="http://www.become.com/site_owners.html" title="Bot home page [new window]" target="_blank">BecomeBot</a>',
'bender','<a href="http://bender.ucr.edu/" title="Bot home page [new window]" target="_blank">bender</a> <a href="http://ivia.ucr.edu/manuals/NiFC/current/index.shtml" title="Bot home page [new window]" target="_blank">focused_crawler</a>',
'cursor','<a href="http://adcenter.hu/docs/en/bot.html " title="Cursor home page [new window]" target="_blank">Cursor</a>',
'custo','<a href="http://www.netwu.com/custo/" title="Custo home page [new window]" target="_blank">Custo</a>',
'datafountains\/dmoz_downloader','<a href="http://infomine.ucr.edu/ " title="DataFountains/DMOZ Downloader home page [new window]" target="_blank">DataFountains/DMOZ Downloader</a>',
+'dataprovider\.com', '<a href="http://www.dataprovider.com/" title="Dataprovider Site Explorer [new window]" target="_blank">Dataprovider Site Explorer</a>',
'daviesbot', 'DaviesBot',
'daypopbot', 'DayPop',
'deepindex','<a href="http://www.deepindex.net/faq.php" title="Deepindex home page [new window]" target="_blank">Deepindex</a>',
'iltrovatore\-setaccio','<a href="http://www.iltrovatore.it/aiuto/motore_di_ricerca.html" title="bot@iltrovatore.it IlTrovatore-Setaccio home page [new window]" target="_blank">IlTrovatore-Setaccio</a>',
'infobot','<a href="http://www.infobot.org/" title="InfoBot home page [new window]" target="_blank">InfoBot</a>',
'infociousbot','<a href="http://corp.infocious.com/tech_crawler.php" title="InfociousBot home page [new window]" target="_blank">InfociousBot</a>',
+'infohelfer','<a href="http://www.infohelfer.de/crawler.php" title="Infohelfer home page [new window]" target="_blank">Infohelfer</a>',
'infomine','<a href="http://infomine.ucr.edu/useragents" title="Bot home page [new window]" target="_blank">INFOMINE VLCrawler</a>',
'insurancobot','<a href="http://www.fastspywareremoval.com/" title="InsurancoBot home page [new window]" target="_blank">InsurancoBot</a>',
+'integromedb\.org','<a href="http://www.integromedb.org/Crawler" title="IntegromeDB home page [new window]" target="_blank">IntegromeDB</a>',
'internet[_+ ]ninja','<a href="http://www.dti.ne.jp/ " title="Internet_Ninja home page [new window]" target="_blank">Internet_Ninja </a>',
'internetarchive','<a href="http://lucene.apache.org/nutch/bot.html " title="InternetArchive home page [new window]" target="_blank">InternetArchive</a>',
'internetseer', 'InternetSeer',
'internetsupervision','<a href="http://internetsupervision.com/" title="InternetSupervision home page [new window]" target="_blank">InternetSupervision</a>',
'irlbot','<a href="http://irl.cs.tamu.edu/crawler" title="Bot home page [new window]" target="_blank">IRLbot</a>',
'isearch2006','<a href="http://www.yahoo.com.cn/" title="isearch2006 home page [new window]" target="_blank">isearch2006</a>',
+'istellabot', '<a href="http://www.tiscali.it/" title="IstellaBot [new window]" target="_blank">IstellaBot</a>',
'iupui_research_bot','<a href="http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/" title="IUPUI_Research_Bot home page [new window]" target="_blank">IUPUI_Research_Bot</a>',
'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility','<a href="http://www.jrtwine.com/Products/CheckFavs/" title="JRTwine_Software_Check_Favorites_Utility home page [new window]" target="_blank">JRTwine_Software_Check_Favorites_Utility</a>',
'justview', 'JustView',
'megite','<a href="http://www.megite.com/" title="Megite home page [new window]" target="_blank">Megite</a>',
'metager\-linkchecker','MetaGer LinkChecker',
'metaspinner','<a href="http://index.meta-spinner.de/" title="Metaspinner home page [new window]" target="_blank">Metaspinner</a>',
+'microsoft.*discovery', '<a href="http://support.microsoft.com/kb/838028/en-us" title="Microsoft KB838028 [new window]" target="_blank">Microsoft Office Protocol Discovery</a>/<a href="http://blogs.msdn.com/b/vsofficedeveloper/archive/2008/03/11/office-existence-discovery-protocol.aspx" title="Description of the Microsoft Office Existence Discovery [new window]" target="_blank">Microsoft Office Existence Discovery</a>',
'microsoft[_+ ]url[_+ ]control','<a href="http://www.webmasterworld.com/forum11/1005.htm" title="Microsoft URL Control home page [new window]" target="_blank">Microsoft URL Control</a>',
'minirank','<a href="http://minirank.com/" title="miniRank home page [new window]" target="_blank">miniRank</a>',
'mini\-reptile','Mini-reptile',
'mydoyouhike','<a href="http://www.doyouhike.net/my" title="Mydoyouhike home page [new window]" target="_blank">Mydoyouhike</a>',
'nagios','Nagios',
'nasa_search','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_140506_2\b" title="NASA Search home page [new window]" target="_blank">NASA Search</a>',
+'netestate ne crawler','<a href="http://www.website-datenbank.de/" title="Website-Datenbank home page [new window]" target="_blank">Website-Datenbank</a>',
'netluchs','<a href="http://www.netluchs.de/" title="Bot home page. [new window]" target="_blank">Netluchs</a>',
'netsprint','<a href="http://www.netsprint.pl/serwis/" title="NetSprint home page [new window]" target="_blank">NetSprint</a>',
'newsgatoronline', 'NewsGator Online',
'sphere_scout','<a href="http://www.sphere.com/" title="Bot home page [new window]" target="_blank">Sphere Scout</a>',
'spip','<a href="http://www.spip.net" title="SPIP home page [new window]" target="_blank">SPIP</a>',
'sproose_crawler','<a href="http://www.sproose.com/bot.html" title="Bot home page [new window]" target="_blank">sproose crawler</a>',
+'ssearch_bot', '<a href="http://www.semantissimo.de/" title="sSearch Crawler [new window]" target="_blank">sSearch Crawler</a>',
'steroid__download','<a href="http://faqs.org.ru/progr/pascal/delphi_internet2.htm" title="STEROID Download home page [new window]" target="_blank">STEROID Download</a>',
'steeler','<a href="http://www.tkl.iis.u-tokyo.ac.jp/~crawler/ " title="Steeler home page [new window]" target="_blank">Steeler</a>',
'suchfin\-bot','<a href="http://www.suchfin.de/" title="Suchfin-Bot home page [new window]" target="_blank">Suchfin-Bot</a>',
'yandex', 'Yandex bot',
'flexum', 'Flexum Search Engine',
'yanga', 'Yanga WorldSearch Bot',
+'yet-another-spider','<a href="http://188.40.112.195/" title="Yet-Another-Spider home page [new window]" target="_blank">Yet-Another-Spider</a>',
'yooglifetchagent','<a href="http://www.yoogli.com/" title="yoogliFetchAgent home page [new window]" target="_blank">yoogliFetchAgent</a>',
'z\-add_link_checker','<a href="http://w3.z-add.co.uk/linkcheck/" title="Z-Add Link Checker home page [new window]" target="_blank">Z-Add Link Checker</a>',
'zealbot','ZealBot',
'zhuaxia','<a href="http://www.zhuaxia.com/" target="_blank">ZhuaXia</a>',
'zspider','<a href="http://feedback.redkolibri.com/" title="Bot home page [new window]" target="_blank">zspider</a>',
'zeus','<a href="http://www.webmasterworld.com/forum11/1840.htm" title="Bot documentation [new window]" target="_blank">Zeus Webster Pro</a>',
+'zumbot','<a href="http://help.zum.com/inquiry" title="ZumBot home page [new window]" target="_blank">ZumBot</a>',
'ng\/1\.','<a href="http://www.exabot.com/" title="Bot home page [new window]" target="_blank">NG 1.x (Exalead)</a>', # put at end to avoid false positive
'ng\/2\.','<a href="http://www.exabot.com/" title="Bot home page [new window]" target="_blank">NG 2.x (Exalead)</a>', # put at end to avoid false positive
'exabot','<a href="http://www.exabot.com/" title="Bot home page [new window]" target="_blank">Exabot</a>', # put at end to avoid false positive
'postrank', 'postrank',
'printfulbot', 'printfulbot',
'protopage', 'protopage',
-'proximic', 'proximic',
+'proximic', '<a href="http://www.proximic.com/info/spider.php" title="Proximic Spider home page [new window]" target="_blank">Proximic Spider</a>',
'quipply', 'quipply',
'r6\_', '<a href="http://www.radian6.com/crawler">Radian 6 Crawler</a>',
'ratingburner', 'ratingburner',
'search\.searchcompletion\.com',
'en\.eazel\.com',
'sr\.searchfunmoods\.com',
+'173\.194\.35\.177',
+'dalesearch\.com',
+'sweetpacks-search\.com',
+'searchgol\.com',
# Chello Portals
'chello\.at',
'chello\.be',
'(^|\.)ask\.co\.uk','bbc\.co\.uk/cgi-bin/search','ifind\.freeserve','looksmart\.co\.uk','splut\.','spotjockey\.','ukdirectory\.','ukindex\.co\.uk','ukplus\.','searchy\.co\.uk',
'search\.fbdownloader\.com',
'search\.babylon\.com',
+'my\.allgameshome\.com',
# Minor finnish search engines
'haku\.www\.fi',
# Minor french search engines
'netluchs\.de',
'schoenerbrausen\.de',
'suche\.gmx\.net',
+'suche\.gmx\.at',
'ecosia\.org',
'de\.aolsearch\.com',
'suche\.aol\.de',
'www\.metager\.de',
'search\.1und1\.de',
'sm\.de',
+'sumaja\.de',
+'navigationshilfe\.t-online\.de',
# Minor Hungarian search engines
'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu',
'tango\.hu',
'search\.searchcompletion\.com', 'searchcompletion',
'en\.eazel\.com','eazelsearch',
'sr\.searchfunmoods\.com', 'searchfunmoods',
+'173\.194\.35\.177', 'googleByIP',
+'dalesearch\.com', 'dalesearch',
+'sweetpacks-search\.com', 'sweetpacks',
+'searchgol\.com', 'searchgol',
# Chello Portals
'chello\.at','chelloat',
'chello\.be','chellobe',
'searchy\.co\.uk','searchy',
'search\.fbdownloader\.com','fbdownloader',
'search\.babylon\.com', 'babylon',
+'my\.allgameshome\.com', 'allgameshome',
# Minor finnish search engines
'haku\.www\.fi','haku',
# Minor french search engines
'netluchs\.de','netluchs',
'schoenerbrausen\.de','schoenerbrausen',
'suche\.gmx\.net', 'gmxsuche',
+'suche\.gmx\.at', 'gmxsuche_at',
'ecosia\.org', 'ecosiasearch',
'de\.aolsearch\.com', 'aolsearch',
'suche\.aol\.de', 'aolsuche',
'www\.metager\.de', 'metager',
'search\.1und1\.de', 'search_1und1_de',
'sm\.de', 'smde',
+'sumaja\.de', 'sumaja',
+'navigationshilfe\.t-online\.de', 'navigationshilfe',
# Minor Hungarian search engines
'heureka\.hu','heureka',
'vizsla\.origo\.hu','origo',
'searchcompletion', 'q=',
'eazelsearch', 'q=',
'searchfunmoods', 'q=',
+'googleByIP', 'q=',
+'dalesearch', 'q=',
+'sweetpacks', 'q=',
+'searchgol', 'q=',
# Chello Portals
'chelloat','q1=',
'chellobe','q1=',
'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=',
'fbdownloader','q=',
'babylon','q=',
+'allgameshome', 's=',
# Minor finnish search engines
'haku','w=',
# Minor french search engines
'netluchs','query=',
'schoenerbrausen','q=',
'gmxsuche', 'q=',
+'gmxsuche_at', 'q=',
'ecosiasearch', 'q=',
'aolsearch', 'q=',
'aolsuche', 'q=',
'metager', 'eingabe=',
'search_1und1_de', 'q=',
'smde', 'q=',
+#'sumaja', 'no query string available', #There is no query string in the referrer url
+'navigationshilfe', 'q=',
# Minor Hungarian search engines
'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=',
'keresolap_hu','q=',
'searchcompletion', '<a href="http://search.searchcompletion.com/" rel="nofollow" title="SearchCompletion Search [new window]" target="_blank">SearchCompletion Search</a>',
'eazelsearch', '<a href="http://en.eazel.com/" rel="nofollow" title="Eazel Search [new window]" target="_blank">Eazel Search</a>',
'searchfunmoods', '<a href="http://sr.searchfunmoods.com/" rel="nofollow" title="Funmoods [new window]" target="_blank">Funmoods</a>',
+'googleByIP', '<a href="http://173.194.35.177/" rel="nofollow" title="Google (Access by IP-Address) [new window]" target="_blank">Google (Access by IP-Address)</a>',
+'dalesearch', '<a href="http://www.dalesearch.com/" rel="nofollow" title="Dale Search [new window]" target="_blank">Dale Search</a>',
+'sweetpacks', '<a href="http://www.sweetpacks-search.com/" rel="nofollow" title="Sweetpacks [new window]" target="_blank">Sweetpacks</a>',
+'searchgol', '<a href="http://www.searchgol.com/" rel="nofollow" title="Search-Gol [new window]" target="_blank">Search-Gol</a>',
# Chello Portals
'chelloat','<a href="http://www.chello.at/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Austria</a>',
'chellobe','<a href="http://www.chello.be/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Chello Belgium</a>',
'splut','Splut', 'spotjockey','Spotjockey', 'ukdirectory','UK Directory', 'ukindex','UKIndex', 'ukplus','UK Plus', 'searchy','searchy.co.uk',
'fbdownloader','<a href="http://search.fbdownloader.com/" rel="nofollow" title="FBDownloader Home Page [new window]" target="_blank">FBDownloader</a>',
'babylon','<a href="http://search.babylon.com/" rel="nofollow" title="Babylon Home Page [new window]" target="_blank">Babylon</a>',
+'allgameshome', '<a href="http://my.allgameshome.com/" rel="nofollow" title="AllGamesHome [new window]" target="_blank">AllGamesHome</a>',
# Minor finnish search engines
'haku','Ihmemaa',
# Minor french search engines
'netluchs','<a href="http://www.netluchs.de/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Netluchs</a>',
'schoenerbrausen','<a href="http://www.schoenerbrausen.de/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Schoenerbrausen/</a>',
'gmxsuche', '<a href="http://suche.gmx.net/" rel="nofollow" title="GMX Suche Home Page [new window]" target="_blank">GMX Suche</a>',
+'gmxsuche_at', '<a href="http://suche.gmx.at/" rel="nofollow" title="GMX Suche Österreich Home Page [new window]" target="_blank">GMX Suche Österreich</a>',
'ecosiasearch', '<a href="http://ecosia.org" rel="nofollow" title="Ecosia Search Home Page [new window]" target="_blank">Ecosia Search</a>',
'aolsearch', '<a href="http://de.aolsearch.com/" rel="nofollow" title="AOL Search Home Page [new window]" target="_blank">AOL Search</a>',
'aolsuche', '<a href="http://suche.aol.de/" rel="nofollow" title="AOL Suche Home Page [new window]" target="_blank">AOL Suche</a>',
'metager', '<a href="http://www.metager.de" rel="nofollow" title="MetaGer Home Page [new window]" target="_blank">MetaGer</a>',
'search_1und1_de', '<a href="http://search.1und1.de/" rel="nofollow" title="1&1 Suche [new window]" target="_blank">1&1 Suche</a>',
'smde', '<a href="http://www.sm.de/" rel="nofollow" title="SM.de - Die SuchMaschine [new window]" target="_blank">SM.de - Die SuchMaschine</a>',
+'sumaja', '<a href="http://www.sumaja.de/" rel="nofollow" title="Sumaja [new window]" target="_blank">Sumaja</a>',
+'navigationshilfe', '<a href="http://navigationshilfe.t-online.de/" rel="nofollow" title="T-Online Navigationshilfe [new window]" target="_blank">T-Online Navigationshilfe</a>',
# Minor hungarian search engines
'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkereso', 'goliat','Goliat', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Kereso',
'tango_hu','<a href="http://tango.hu/" rel="nofollow" title="Search Engine Home Page [new window]" target="_blank">Tango</a>',