From: eldy <>
Date: Tue, 15 Oct 2013 21:34:40 +0000 (+0000)
Subject: Update databases
X-Git-Tag: AWSTATS_7_3~45
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2f8dd732d43530aa2c5ae6be8db7ef62984a040f;p=thirdparty%2FAWStats.git
Update databases
---
diff --git a/wwwroot/cgi-bin/lib/robots.pm b/wwwroot/cgi-bin/lib/robots.pm
index 4b40f8f0..9b1ee0ee 100644
--- a/wwwroot/cgi-bin/lib/robots.pm
+++ b/wwwroot/cgi-bin/lib/robots.pm
@@ -308,6 +308,12 @@
# updated AskJeeves to Ask
# 2012-06-05 Albrecht Mueller
# added Grabber from SDSC (San Diego Supercomputer Center).
+# 2013-09-30 Albrecht Mueller
+# AWStats probably cannot detect this bot as it identifies itself in
+# the referrer field and not in the user agent string.
+#92.113.100.35 - - [29/Sep/2013:17:22:46 +0200] "GET /robots.txt HTTP/1.1" 200 516 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
+#92.113.100.35 - - [29/Sep/2013:17:22:49 +0200] "GET /tghome.htm HTTP/1.1" 200 4445 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
+#92.113.100.35 - - [29/Sep/2013:17:22:51 +0200] "GET / HTTP/1.1" 200 5467 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
# to do MS Search 4.0 Robot
@@ -392,6 +398,7 @@
'abonti\.com',
'acme\.spider',
'ahoythehomepagefinder',
+'ahrefsbot',
'alkaline',
'anthill',
'arachnophilia',
@@ -661,6 +668,7 @@
'xget',
# Other robots reported by users
'1\-more_scanner',
+'360spider',
'accoona\-ai\-agent',
'activebookmark',
'adamm_bot',
@@ -676,6 +684,7 @@
'aspseek',
'asterias',
'awbot',
+'backlinktest\.com',
'baiduspider',
'becomebot',
'bender',
@@ -712,6 +721,7 @@
'cursor',
'custo',
'datafountains\/dmoz_downloader',
+'dataprovider\.com',
'daviesbot',
'daypopbot',
'deepindex',
@@ -777,8 +787,10 @@
'iltrovatore\-setaccio',
'infobot',
'infociousbot',
+'infohelfer',
'infomine',
'insurancobot',
+'integromedb\.org',
'internet[_+ ]ninja',
'internetarchive',
'internetseer',
@@ -786,6 +798,7 @@
'ips\-agent',
'irlbot',
'isearch2006',
+'istellabot',
'iupui_research_bot',
'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility',
'justview',
@@ -820,6 +833,7 @@
'mediapartners\-google',
'megite',
'metaspinner',
+'microsoft.*discovery', # = 'microsoft (?:office (?:protocol|existence)|data access internet publishing provider protocol) discovery',
'microsoft[_+ ]url[_+ ]control',
'mini\-reptile',
'minirank',
@@ -834,9 +848,10 @@
'msrabot',
'msrbot',
'mt::telegraph::agent',
+'mydoyouhike',
'nagios',
'nasa_search',
-'mydoyouhike',
+'netestate ne crawler',
'netluchs',
'netsprint',
'newsgatoronline',
@@ -898,8 +913,10 @@
'sohu', # "sohu agent"
'snappy',
'sphere_scout',
+'spiderlytics',
'spip',
'sproose_crawler',
+'ssearch_bot',
'steeler',
'steroid__download',
'suchfin\-bot',
@@ -968,6 +985,7 @@
'yandex',
'flexum',
'yanga',
+'yet-another-spider',
'yooglifetchagent',
'z\-add_link_checker',
'zealbot',
@@ -1084,6 +1102,7 @@
'yourls',
'zemanta',
'zend_http_client',
+'zumbot',
# Other id that are 99% of robots
'wget',
'libwww',
@@ -1168,6 +1187,7 @@
'abonti\.com','Abonti WebSearch',
'acme\.spider','Acme.Spider',
'ahoythehomepagefinder','Ahoy! The Homepage Finder',
+'ahrefsbot', 'AhrefsBot',
'alkaline','Alkaline',
'anthill','Anthill',
'arachnophilia','Arachnophilia',
@@ -1380,6 +1400,7 @@
'spider[_+ ]monkey','Spider monkey',
'spiderbot','SpiderBot',
'spiderline','Spiderline Crawler',
+'spiderlytics', 'Spiderlytics: No homepage, e-mail only: spider (at) spiderlytics.com',
'spiderman','Spiderman',
'spiderview','SpiderView(tm)',
'spry','Spry Wizard Robot',
@@ -1447,6 +1468,7 @@
'xget','XGET',
# Other robots reported by users
'1\-more_scanner','1-More Scanner',
+'360spider','360spider',
'accoona\-ai\-agent','Accoona-AI-Agent',
'activebookmark','ActiveBookmark',
'adamm_bot','AdamM Bot',
@@ -1462,6 +1484,7 @@
'aspseek','ASPseek',
'asterias', 'Asterias',
'awbot', 'AWBot',
+'backlinktest\.com', 'BacklinkCrawler',
'baiduspider','BaiDuSpider',
'becomebot', 'BecomeBot',
'bender','bender focused_crawler',
@@ -1497,6 +1520,7 @@
'cursor','Cursor',
'custo','Custo',
'datafountains\/dmoz_downloader','DataFountains/DMOZ Downloader',
+'dataprovider\.com', 'Dataprovider Site Explorer',
'daviesbot', 'DaviesBot',
'daypopbot', 'DayPop',
'deepindex','Deepindex',
@@ -1562,14 +1586,17 @@
'iltrovatore\-setaccio','IlTrovatore-Setaccio',
'infobot','InfoBot',
'infociousbot','InfociousBot',
+'infohelfer','Infohelfer',
'infomine','INFOMINE VLCrawler',
'insurancobot','InsurancoBot',
+'integromedb\.org','IntegromeDB',
'internet[_+ ]ninja','Internet_Ninja ',
'internetarchive','InternetArchive',
'internetseer', 'InternetSeer',
'internetsupervision','InternetSupervision',
'irlbot','IRLbot',
'isearch2006','isearch2006',
+'istellabot', 'IstellaBot',
'iupui_research_bot','IUPUI_Research_Bot',
'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility','JRTwine_Software_Check_Favorites_Utility',
'justview', 'JustView',
@@ -1601,6 +1628,7 @@
'megite','Megite',
'metager\-linkchecker','MetaGer LinkChecker',
'metaspinner','Metaspinner',
+'microsoft.*discovery', 'Microsoft Office Protocol Discovery/Microsoft Office Existence Discovery',
'microsoft[_+ ]url[_+ ]control','Microsoft URL Control',
'minirank','miniRank',
'mini\-reptile','Mini-reptile',
@@ -1618,6 +1646,7 @@
'mydoyouhike','Mydoyouhike',
'nagios','Nagios',
'nasa_search','NASA Search',
+'netestate ne crawler','Website-Datenbank',
'netluchs','Netluchs',
'netsprint','NetSprint',
'newsgatoronline', 'NewsGator Online',
@@ -1682,6 +1711,7 @@
'sphere_scout','Sphere Scout',
'spip','SPIP',
'sproose_crawler','sproose crawler',
+'ssearch_bot', 'sSearch Crawler',
'steroid__download','STEROID Download',
'steeler','Steeler',
'suchfin\-bot','Suchfin-Bot',
@@ -1750,12 +1780,14 @@
'yandex', 'Yandex bot',
'flexum', 'Flexum Search Engine',
'yanga', 'Yanga WorldSearch Bot',
+'yet-another-spider','Yet-Another-Spider',
'yooglifetchagent','yoogliFetchAgent',
'z\-add_link_checker','Z-Add Link Checker',
'zealbot','ZealBot',
'zhuaxia','ZhuaXia',
'zspider','zspider',
'zeus','Zeus Webster Pro',
+'zumbot','ZumBot',
'ng\/1\.','NG 1.x (Exalead)', # put at end to avoid false positive
'ng\/2\.','NG 2.x (Exalead)', # put at end to avoid false positive
'exabot','Exabot', # put at end to avoid false positive
@@ -1838,7 +1870,7 @@
'postrank', 'postrank',
'printfulbot', 'printfulbot',
'protopage', 'protopage',
-'proximic', 'proximic',
+'proximic', 'Proximic Spider',
'quipply', 'quipply',
'r6\_', 'Radian 6 Crawler',
'ratingburner', 'ratingburner',
diff --git a/wwwroot/cgi-bin/lib/search_engines.pm b/wwwroot/cgi-bin/lib/search_engines.pm
index 7b5f34ad..480d7b81 100644
--- a/wwwroot/cgi-bin/lib/search_engines.pm
+++ b/wwwroot/cgi-bin/lib/search_engines.pm
@@ -313,6 +313,10 @@
'search\.searchcompletion\.com',
'en\.eazel\.com',
'sr\.searchfunmoods\.com',
+'173\.194\.35\.177',
+'dalesearch\.com',
+'sweetpacks-search\.com',
+'searchgol\.com',
# Chello Portals
'chello\.at',
'chello\.be',
@@ -378,6 +382,7 @@
'(^|\.)ask\.co\.uk','bbc\.co\.uk/cgi-bin/search','ifind\.freeserve','looksmart\.co\.uk','splut\.','spotjockey\.','ukdirectory\.','ukindex\.co\.uk','ukplus\.','searchy\.co\.uk',
'search\.fbdownloader\.com',
'search\.babylon\.com',
+'my\.allgameshome\.com',
# Minor finnish search engines
'haku\.www\.fi',
# Minor french search engines
@@ -395,6 +400,7 @@
'netluchs\.de',
'schoenerbrausen\.de',
'suche\.gmx\.net',
+'suche\.gmx\.at',
'ecosia\.org',
'de\.aolsearch\.com',
'suche\.aol\.de',
@@ -406,6 +412,8 @@
'www\.metager\.de',
'search\.1und1\.de',
'sm\.de',
+'sumaja\.de',
+'navigationshilfe\.t-online\.de',
# Minor Hungarian search engines
'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu',
'tango\.hu',
@@ -570,6 +578,10 @@
'search\.searchcompletion\.com', 'searchcompletion',
'en\.eazel\.com','eazelsearch',
'sr\.searchfunmoods\.com', 'searchfunmoods',
+'173\.194\.35\.177', 'googleByIP',
+'dalesearch\.com', 'dalesearch',
+'sweetpacks-search\.com', 'sweetpacks',
+'searchgol\.com', 'searchgol',
# Chello Portals
'chello\.at','chelloat',
'chello\.be','chellobe',
@@ -661,6 +673,7 @@
'searchy\.co\.uk','searchy',
'search\.fbdownloader\.com','fbdownloader',
'search\.babylon\.com', 'babylon',
+'my\.allgameshome\.com', 'allgameshome',
# Minor finnish search engines
'haku\.www\.fi','haku',
# Minor french search engines
@@ -691,6 +704,7 @@
'netluchs\.de','netluchs',
'schoenerbrausen\.de','schoenerbrausen',
'suche\.gmx\.net', 'gmxsuche',
+'suche\.gmx\.at', 'gmxsuche_at',
'ecosia\.org', 'ecosiasearch',
'de\.aolsearch\.com', 'aolsearch',
'suche\.aol\.de', 'aolsuche',
@@ -702,6 +716,8 @@
'www\.metager\.de', 'metager',
'search\.1und1\.de', 'search_1und1_de',
'sm\.de', 'smde',
+'sumaja\.de', 'sumaja',
+'navigationshilfe\.t-online\.de', 'navigationshilfe',
# Minor Hungarian search engines
'heureka\.hu','heureka',
'vizsla\.origo\.hu','origo',
@@ -879,6 +895,10 @@
'searchcompletion', 'q=',
'eazelsearch', 'q=',
'searchfunmoods', 'q=',
+'googleByIP', 'q=',
+'dalesearch', 'q=',
+'sweetpacks', 'q=',
+'searchgol', 'q=',
# Chello Portals
'chelloat','q1=',
'chellobe','q1=',
@@ -945,6 +965,7 @@
'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=',
'fbdownloader','q=',
'babylon','q=',
+'allgameshome', 's=',
# Minor finnish search engines
'haku','w=',
# Minor french search engines
@@ -963,6 +984,7 @@
'netluchs','query=',
'schoenerbrausen','q=',
'gmxsuche', 'q=',
+'gmxsuche_at', 'q=',
'ecosiasearch', 'q=',
'aolsearch', 'q=',
'aolsuche', 'q=',
@@ -974,6 +996,8 @@
'metager', 'eingabe=',
'search_1und1_de', 'q=',
'smde', 'q=',
+#'sumaja', 'no query string available', #There is no query string in the referrer url
+'navigationshilfe', 'q=',
# Minor Hungarian search engines
'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=',
'keresolap_hu','q=',
@@ -1149,6 +1173,10 @@
'searchcompletion', 'SearchCompletion Search',
'eazelsearch', 'Eazel Search',
'searchfunmoods', 'Funmoods',
+'googleByIP', 'Google (Access by IP-Address)',
+'dalesearch', 'Dale Search',
+'sweetpacks', 'Sweetpacks',
+'searchgol', 'Search-Gol',
# Chello Portals
'chelloat','Chello Austria',
'chellobe','Chello Belgium',
@@ -1216,6 +1244,7 @@
'splut','Splut', 'spotjockey','Spotjockey', 'ukdirectory','UK Directory', 'ukindex','UKIndex', 'ukplus','UK Plus', 'searchy','searchy.co.uk',
'fbdownloader','FBDownloader',
'babylon','Babylon',
+'allgameshome', 'AllGamesHome',
# Minor finnish search engines
'haku','Ihmemaa',
# Minor french search engines
@@ -1235,6 +1264,7 @@
'netluchs','Netluchs',
'schoenerbrausen','Schoenerbrausen/',
'gmxsuche', 'GMX Suche',
+'gmxsuche_at', 'GMX Suche Österreich',
'ecosiasearch', 'Ecosia Search',
'aolsearch', 'AOL Search',
'aolsuche', 'AOL Suche',
@@ -1246,6 +1276,8 @@
'metager', 'MetaGer',
'search_1und1_de', '1&1 Suche',
'smde', 'SM.de - Die SuchMaschine',
+'sumaja', 'Sumaja',
+'navigationshilfe', 'T-Online Navigationshilfe',
# Minor hungarian search engines
'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkereso', 'goliat','Goliat', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Kereso',
'tango_hu','Tango',