From: eldy <> Date: Tue, 15 Oct 2013 21:34:40 +0000 (+0000) Subject: Update databases X-Git-Tag: AWSTATS_7_3~45 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2f8dd732d43530aa2c5ae6be8db7ef62984a040f;p=thirdparty%2FAWStats.git Update databases --- diff --git a/wwwroot/cgi-bin/lib/robots.pm b/wwwroot/cgi-bin/lib/robots.pm index 4b40f8f0..9b1ee0ee 100644 --- a/wwwroot/cgi-bin/lib/robots.pm +++ b/wwwroot/cgi-bin/lib/robots.pm @@ -308,6 +308,12 @@ # updated AskJeeves to Ask # 2012-06-05 Albrecht Mueller # added Grabber from SDSC (San Diego Supercomputer Center). +# 2013-09-30 Albrecht Mueller +# AWStats probably cannot detect this bot as it identifies itself in +# the referrer field and not in the user agent string. +#92.113.100.35 - - [29/Sep/2013:17:22:46 +0200] "GET /robots.txt HTTP/1.1" 200 516 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-" +#92.113.100.35 - - [29/Sep/2013:17:22:49 +0200] "GET /tghome.htm HTTP/1.1" 200 4445 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-" +#92.113.100.35 - - [29/Sep/2013:17:22:51 +0200] "GET / HTTP/1.1" 200 5467 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-" # to do MS Search 4.0 Robot @@ -392,6 +398,7 @@ 'abonti\.com', 'acme\.spider', 'ahoythehomepagefinder', +'ahrefsbot', 'alkaline', 'anthill', 'arachnophilia', @@ -661,6 +668,7 @@ 'xget', # Other robots reported by users '1\-more_scanner', +'360spider', 'accoona\-ai\-agent', 'activebookmark', 'adamm_bot', @@ -676,6 +684,7 @@ 'aspseek', 'asterias', 'awbot', +'backlinktest\.com', 'baiduspider', 'becomebot', 'bender', @@ -712,6 +721,7 @@ 'cursor', 'custo', 'datafountains\/dmoz_downloader', +'dataprovider\.com', 'daviesbot', 'daypopbot', 'deepindex', @@ -777,8 +787,10 @@ 'iltrovatore\-setaccio', 'infobot', 'infociousbot', +'infohelfer', 'infomine', 'insurancobot', +'integromedb\.org', 'internet[_+ ]ninja', 'internetarchive', 'internetseer', @@ -786,6 +798,7 @@ 'ips\-agent', 'irlbot', 'isearch2006', +'istellabot', 'iupui_research_bot', 'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility', 'justview', @@ -820,6 +833,7 @@ 'mediapartners\-google', 'megite', 'metaspinner', +'microsoft.*discovery', # = 'microsoft (?:office (?:protocol|existence)|data access internet publishing provider protocol) discovery', 'microsoft[_+ ]url[_+ ]control', 'mini\-reptile', 'minirank', @@ -834,9 +848,10 @@ 'msrabot', 'msrbot', 'mt::telegraph::agent', +'mydoyouhike', 'nagios', 'nasa_search', -'mydoyouhike', +'netestate ne crawler', 'netluchs', 'netsprint', 'newsgatoronline', @@ -898,8 +913,10 @@ 'sohu', # "sohu agent" 'snappy', 'sphere_scout', +'spiderlytics', 'spip', 'sproose_crawler', +'ssearch_bot', 'steeler', 'steroid__download', 'suchfin\-bot', @@ -968,6 +985,7 @@ 'yandex', 'flexum', 'yanga', +'yet-another-spider', 'yooglifetchagent', 'z\-add_link_checker', 'zealbot', @@ -1084,6 +1102,7 @@ 'yourls', 'zemanta', 'zend_http_client', +'zumbot', # Other id that are 99% of robots 'wget', 'libwww', @@ -1168,6 +1187,7 @@ 'abonti\.com','Abonti WebSearch', 'acme\.spider','Acme.Spider', 'ahoythehomepagefinder','Ahoy! The Homepage Finder', +'ahrefsbot', 'AhrefsBot', 'alkaline','Alkaline', 'anthill','Anthill', 'arachnophilia','Arachnophilia', @@ -1380,6 +1400,7 @@ 'spider[_+ ]monkey','Spider monkey', 'spiderbot','SpiderBot', 'spiderline','Spiderline Crawler', +'spiderlytics', 'Spiderlytics: No homepage, e-mail only: spider (at) spiderlytics.com', 'spiderman','Spiderman', 'spiderview','SpiderView(tm)', 'spry','Spry Wizard Robot', @@ -1447,6 +1468,7 @@ 'xget','XGET', # Other robots reported by users '1\-more_scanner','1-More Scanner', +'360spider','360spider', 'accoona\-ai\-agent','Accoona-AI-Agent', 'activebookmark','ActiveBookmark', 'adamm_bot','AdamM Bot', @@ -1462,6 +1484,7 @@ 'aspseek','ASPseek', 'asterias', 'Asterias', 'awbot', 'AWBot', +'backlinktest\.com', 'BacklinkCrawler', 'baiduspider','BaiDuSpider', 'becomebot', 'BecomeBot', 'bender','bender focused_crawler', @@ -1497,6 +1520,7 @@ 'cursor','Cursor', 'custo','Custo', 'datafountains\/dmoz_downloader','DataFountains/DMOZ Downloader', +'dataprovider\.com', 'Dataprovider Site Explorer', 'daviesbot', 'DaviesBot', 'daypopbot', 'DayPop', 'deepindex','Deepindex', @@ -1562,14 +1586,17 @@ 'iltrovatore\-setaccio','IlTrovatore-Setaccio', 'infobot','InfoBot', 'infociousbot','InfociousBot', +'infohelfer','Infohelfer', 'infomine','INFOMINE VLCrawler', 'insurancobot','InsurancoBot', +'integromedb\.org','IntegromeDB', 'internet[_+ ]ninja','Internet_Ninja ', 'internetarchive','InternetArchive', 'internetseer', 'InternetSeer', 'internetsupervision','InternetSupervision', 'irlbot','IRLbot', 'isearch2006','isearch2006', +'istellabot', 'IstellaBot', 'iupui_research_bot','IUPUI_Research_Bot', 'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility','JRTwine_Software_Check_Favorites_Utility', 'justview', 'JustView', @@ -1601,6 +1628,7 @@ 'megite','Megite', 'metager\-linkchecker','MetaGer LinkChecker', 'metaspinner','Metaspinner', +'microsoft.*discovery', 'Microsoft Office Protocol Discovery/Microsoft Office Existence Discovery', 'microsoft[_+ ]url[_+ ]control','Microsoft URL Control', 'minirank','miniRank', 'mini\-reptile','Mini-reptile', @@ -1618,6 +1646,7 @@ 'mydoyouhike','Mydoyouhike', 'nagios','Nagios', 'nasa_search','NASA Search', +'netestate ne crawler','Website-Datenbank', 'netluchs','Netluchs', 'netsprint','NetSprint', 'newsgatoronline', 'NewsGator Online', @@ -1682,6 +1711,7 @@ 'sphere_scout','Sphere Scout', 'spip','SPIP', 'sproose_crawler','sproose crawler', +'ssearch_bot', 'sSearch Crawler', 'steroid__download','STEROID Download', 'steeler','Steeler', 'suchfin\-bot','Suchfin-Bot', @@ -1750,12 +1780,14 @@ 'yandex', 'Yandex bot', 'flexum', 'Flexum Search Engine', 'yanga', 'Yanga WorldSearch Bot', +'yet-another-spider','Yet-Another-Spider', 'yooglifetchagent','yoogliFetchAgent', 'z\-add_link_checker','Z-Add Link Checker', 'zealbot','ZealBot', 'zhuaxia','ZhuaXia', 'zspider','zspider', 'zeus','Zeus Webster Pro', +'zumbot','ZumBot', 'ng\/1\.','NG 1.x (Exalead)', # put at end to avoid false positive 'ng\/2\.','NG 2.x (Exalead)', # put at end to avoid false positive 'exabot','Exabot', # put at end to avoid false positive @@ -1838,7 +1870,7 @@ 'postrank', 'postrank', 'printfulbot', 'printfulbot', 'protopage', 'protopage', -'proximic', 'proximic', +'proximic', 'Proximic Spider', 'quipply', 'quipply', 'r6\_', 'Radian 6 Crawler', 'ratingburner', 'ratingburner', diff --git a/wwwroot/cgi-bin/lib/search_engines.pm b/wwwroot/cgi-bin/lib/search_engines.pm index 7b5f34ad..480d7b81 100644 --- a/wwwroot/cgi-bin/lib/search_engines.pm +++ b/wwwroot/cgi-bin/lib/search_engines.pm @@ -313,6 +313,10 @@ 'search\.searchcompletion\.com', 'en\.eazel\.com', 'sr\.searchfunmoods\.com', +'173\.194\.35\.177', +'dalesearch\.com', +'sweetpacks-search\.com', +'searchgol\.com', # Chello Portals 'chello\.at', 'chello\.be', @@ -378,6 +382,7 @@ '(^|\.)ask\.co\.uk','bbc\.co\.uk/cgi-bin/search','ifind\.freeserve','looksmart\.co\.uk','splut\.','spotjockey\.','ukdirectory\.','ukindex\.co\.uk','ukplus\.','searchy\.co\.uk', 'search\.fbdownloader\.com', 'search\.babylon\.com', +'my\.allgameshome\.com', # Minor finnish search engines 'haku\.www\.fi', # Minor french search engines @@ -395,6 +400,7 @@ 'netluchs\.de', 'schoenerbrausen\.de', 'suche\.gmx\.net', +'suche\.gmx\.at', 'ecosia\.org', 'de\.aolsearch\.com', 'suche\.aol\.de', @@ -406,6 +412,8 @@ 'www\.metager\.de', 'search\.1und1\.de', 'sm\.de', +'sumaja\.de', +'navigationshilfe\.t-online\.de', # Minor Hungarian search engines 'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu', 'tango\.hu', @@ -570,6 +578,10 @@ 'search\.searchcompletion\.com', 'searchcompletion', 'en\.eazel\.com','eazelsearch', 'sr\.searchfunmoods\.com', 'searchfunmoods', +'173\.194\.35\.177', 'googleByIP', +'dalesearch\.com', 'dalesearch', +'sweetpacks-search\.com', 'sweetpacks', +'searchgol\.com', 'searchgol', # Chello Portals 'chello\.at','chelloat', 'chello\.be','chellobe', @@ -661,6 +673,7 @@ 'searchy\.co\.uk','searchy', 'search\.fbdownloader\.com','fbdownloader', 'search\.babylon\.com', 'babylon', +'my\.allgameshome\.com', 'allgameshome', # Minor finnish search engines 'haku\.www\.fi','haku', # Minor french search engines @@ -691,6 +704,7 @@ 'netluchs\.de','netluchs', 'schoenerbrausen\.de','schoenerbrausen', 'suche\.gmx\.net', 'gmxsuche', +'suche\.gmx\.at', 'gmxsuche_at', 'ecosia\.org', 'ecosiasearch', 'de\.aolsearch\.com', 'aolsearch', 'suche\.aol\.de', 'aolsuche', @@ -702,6 +716,8 @@ 'www\.metager\.de', 'metager', 'search\.1und1\.de', 'search_1und1_de', 'sm\.de', 'smde', +'sumaja\.de', 'sumaja', +'navigationshilfe\.t-online\.de', 'navigationshilfe', # Minor Hungarian search engines 'heureka\.hu','heureka', 'vizsla\.origo\.hu','origo', @@ -879,6 +895,10 @@ 'searchcompletion', 'q=', 'eazelsearch', 'q=', 'searchfunmoods', 'q=', +'googleByIP', 'q=', +'dalesearch', 'q=', +'sweetpacks', 'q=', +'searchgol', 'q=', # Chello Portals 'chelloat','q1=', 'chellobe','q1=', @@ -945,6 +965,7 @@ 'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=', 'fbdownloader','q=', 'babylon','q=', +'allgameshome', 's=', # Minor finnish search engines 'haku','w=', # Minor french search engines @@ -963,6 +984,7 @@ 'netluchs','query=', 'schoenerbrausen','q=', 'gmxsuche', 'q=', +'gmxsuche_at', 'q=', 'ecosiasearch', 'q=', 'aolsearch', 'q=', 'aolsuche', 'q=', @@ -974,6 +996,8 @@ 'metager', 'eingabe=', 'search_1und1_de', 'q=', 'smde', 'q=', +#'sumaja', 'no query string available', #There is no query string in the referrer url +'navigationshilfe', 'q=', # Minor Hungarian search engines 'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=', 'keresolap_hu','q=', @@ -1149,6 +1173,10 @@ 'searchcompletion', 'SearchCompletion Search', 'eazelsearch', 'Eazel Search', 'searchfunmoods', 'Funmoods', +'googleByIP', 'Google (Access by IP-Address)', +'dalesearch', 'Dale Search', +'sweetpacks', 'Sweetpacks', +'searchgol', 'Search-Gol', # Chello Portals 'chelloat','Chello Austria', 'chellobe','Chello Belgium', @@ -1216,6 +1244,7 @@ 'splut','Splut', 'spotjockey','Spotjockey', 'ukdirectory','UK Directory', 'ukindex','UKIndex', 'ukplus','UK Plus', 'searchy','searchy.co.uk', 'fbdownloader','FBDownloader', 'babylon','Babylon', +'allgameshome', 'AllGamesHome', # Minor finnish search engines 'haku','Ihmemaa', # Minor french search engines @@ -1235,6 +1264,7 @@ 'netluchs','Netluchs', 'schoenerbrausen','Schoenerbrausen/', 'gmxsuche', 'GMX Suche', +'gmxsuche_at', 'GMX Suche Österreich', 'ecosiasearch', 'Ecosia Search', 'aolsearch', 'AOL Search', 'aolsuche', 'AOL Suche', @@ -1246,6 +1276,8 @@ 'metager', 'MetaGer', 'search_1und1_de', '1&1 Suche', 'smde', 'SM.de - Die SuchMaschine', +'sumaja', 'Sumaja', +'navigationshilfe', 'T-Online Navigationshilfe', # Minor hungarian search engines 'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkereso', 'goliat','Goliat', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Kereso', 'tango_hu','Tango',