From: eldy <>
Date: Sat, 24 Aug 2013 19:11:32 +0000 (+0000)
Subject: Update robot, search engines database.
X-Git-Tag: AWSTATS_7_3~54
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bb39440fbfdbbfa5919968d0fca75db7443c969a;p=thirdparty%2FAWStats.git
Update robot, search engines database.
---
diff --git a/docs/awstats_changelog.txt b/docs/awstats_changelog.txt
index 14306c5e..a77fe863 100644
--- a/docs/awstats_changelog.txt
+++ b/docs/awstats_changelog.txt
@@ -8,6 +8,7 @@ $Revision$ - $Author$ - $Date$
New features:
- #877 Windows 8 + iOS Support in AWStats
- Add command line option -version
+- Update robot, search engines database.
***** 7.2 *****
diff --git a/wwwroot/cgi-bin/lib/robots.pm b/wwwroot/cgi-bin/lib/robots.pm
index a58361c4..6581b622 100644
--- a/wwwroot/cgi-bin/lib/robots.pm
+++ b/wwwroot/cgi-bin/lib/robots.pm
@@ -39,10 +39,10 @@
# updated YahooSeeker description (blog crawler)
# 2005-09-16 added link for http://linkchecker.sourceforge.net
# added ConveraCrawler/0.9d ( http://www.authoritativeweb.com/crawl)
-# added Blogslive info@blogslive.com intelliseek.com
+# added Blogslive info@blogslive.com intelliseek.com
# added BlogPulse (ISSpider-3.0) intelliseek.com
# 2005-09-26 added Feedfetcher-Google (http://www.google.com/feedfetcher.html)
-# added EverbeeCrawler
+# added EverbeeCrawler
# added Yahoo-Blogs http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html
# added link for Bloglines http://www.bloglines.com
# 2005-10-19 fixed Feedfetcher-Google (http://www.google.com/feedfetcher.html)
@@ -91,11 +91,11 @@
# added EARTHCOM.info www.earthcom.info
# added HTTrack off-line browser 'httrack','HTTrack', http://www.httrack.com/ [Moizes Gabor]
# added KummHttp http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b [Moizes Gabor]
-# 2006-01-01
+# 2006-01-01
# added Dulance http://www.dulance.com/bot.jsp
# added MojeekBot http://www.mojeek.com/bot.html
# added nicebot http://www.egghelp.org/setup.htm ?
-# added Snappy http://www.urltrends.com/faq.php
+# added Snappy http://www.urltrends.com/faq.php
# added sohu agent
# added VORTEX http://marty.anstey.ca/robots/vortex/ [matthys70 users.sourceforge.net]
# added zspider http://feedback.redkolibri.com/
@@ -111,63 +111,63 @@
# added LetsCrawl.com http://letscrawl.com
# added ichiro http://help.goo.ne.jp/door/crawlerE.html
# 2006-01-27 additional 22 robots from a list provided by Moizes Gabor
-# added ALeadSoftbot http://www.aleadsoft.com/bot.htm
-# added CipinetBot http://www.cipinet.com/bot.html
-# added Cuasarbot http://www.cuasar.com/
-# added Dumbot http://www.dumbfind.com/
-# added Extreme_Picture_Finder http://www.exisoftware.com/
-# added Fooky.com/ScorpionBot/ScoutOut http://www.fooky.com/scorpionbots
+# added ALeadSoftbot http://www.aleadsoft.com/bot.htm
+# added CipinetBot http://www.cipinet.com/bot.html
+# added Cuasarbot http://www.cuasar.com/
+# added Dumbot http://www.dumbfind.com/
+# added Extreme_Picture_Finder http://www.exisoftware.com/
+# added Fooky.com/ScorpionBot/ScoutOut http://www.fooky.com/scorpionbots
# added IlTrovatore-Setaccio http://www.iltrovatore.it/aiuto/motore_di_ricerca.html bot@iltrovatore.it
-# added InsurancoBot http://www.fastspywareremoval.com/
+# added InsurancoBot http://www.fastspywareremoval.com/
# added InternetArchive http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org
-# added KazoomBot http://www.kazoom.ca/bot.html kazoombot@kazoom.ca
+# added KazoomBot http://www.kazoom.ca/bot.html kazoombot@kazoom.ca
# added Kurzor http://www.easymail.hu/ cursor@easymail.hu
# added NutchCVS http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org
# added NutchOSU-VLIB http://lucene.apache.org/nutch/bot.html nutch-agent@lucene.apache.org
-# added Orbiter http://www.dailyorbit.com/bot.htm
-# added PHP_version_tracker http://www.nexen.net/phpversion/bot.php
-# added SuperBot http://www.sparkleware.com/superbot/
+# added Orbiter http://www.dailyorbit.com/bot.htm
+# added PHP_version_tracker http://www.nexen.net/phpversion/bot.php
+# added SuperBot http://www.sparkleware.com/superbot/
# added SynooBot http://www.synoo.de/bot.html webmaster@synoo.com
-# added TestBot http://www.agbrain.com/
-# added TutorGigBot http://www.tutorgig.info/
-# added WebIndexer mailto://webindexerv1@yahoo.com
+# added TestBot http://www.agbrain.com/
+# added TutorGigBot http://www.tutorgig.info/
+# added WebIndexer mailto://webindexerv1@yahoo.com
# added WebMiner http://64.124.122.252/feedback.html
-# 2006-02-01
+# 2006-02-01
# added heritrix https://sourceforge.net/forum/message.php?msg_id=3550202
# added Zeus Webster Pro https://sourceforge.net/forum/message.php?msg_id=3141164
# additional robots from a list provided by Moizes Gabor [ mojzi -a-t- free mail hu ]
# added Candlelight_Favorites_Inspector
-# added DomainChecker
-# added EasyDL
-# added FavOrg
+# added DomainChecker
+# added EasyDL
+# added FavOrg
# added Favorites_Sweeper
# added Html_Link_Validator
-# added Internet_Ninja
+# added Internet_Ninja
# added JRTwine_Software_Check_Favorites_Utility
# fixed Microsoft_URL_Control
-# added miniRank
+# added miniRank
# added Missigua_Locator
-# added NPBot
-# added Ocelli
-# added Onet.pl_SA
-# added proodleBot
-# added SearchGuild_DMOZ_Experiment
-# added Susie
+# added NPBot
+# added Ocelli
+# added Onet.pl_SA
+# added proodleBot
+# added SearchGuild_DMOZ_Experiment
+# added Susie
# added Website_Monitoring_Bot
# added Xenu_Link_Sleuth
# 2006-05-15
# added ASPseek http://www.aspseek.org/
-# added AdamM Bot http://home.blic.net/adamm/
+# added AdamM Bot http://home.blic.net/adamm/
# added archive.org_bot http://crawls.archive.org/collections/bncf/crawl.html
# added arianna.libero.it (Italian Portal/search engine)
# added Biz360 spider http://www.biz360.com
# added BlogBridge Service http://www.blogbridge.com/
-# added BlogSearch http://www.icerocket.com/
+# added BlogSearch http://www.icerocket.com/
# added libcrawl
# added edgeio-relanshanbottriever http://www.edgeio.com
# added FeedFlow http://feedflow.com/about
# added Biblioteca Nazionale Centrale di Firenze (Italian National Archive) http://www.bncf.firenze.sbn.it/raccolta.txt
-# added Java catchall - used by many spam bots
+# added Java catchall - used by many spam bots
# added lanshanbot http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_g_l_140406_1%5Cb
# added msnbot-media http://search.msn.com/msnbot.htm
# added MT::Telegraph::Agent
@@ -197,7 +197,7 @@
# added ActiveBookmark http://www.libmaster.com/active_bookmark.php
# added BIGLOTRON http://www.biglotron.com/robot.html
# added Bookmark-Manager http://bkm.sourceforge.net/
-# added cbn00glebot
+# added cbn00glebot
# added Cerberian Drtrs http://www.pgts.com.au/cgi-bin/psql?robot_info=25240
# added CFNetwork http://www.cocoadev.com/index.pl?CFNetwork
# added CheckWeb link validator http://p.duby.free.fr/chkweb.htm
@@ -205,9 +205,9 @@
# added ConveraCrawler http://www.authoritativeweb.com/crawl/
# added ConveraMultiMediaCrawler http://www.authoritativeweb.com/crawl/
# added CSE HTML Validator Lite Online http://online.htmlvalidator.com/php/onlinevallite.php
-# added Cursor http://adcenter.hu/docs/en/bot.html
+# added Cursor http://adcenter.hu/docs/en/bot.html
# added Custo http://www.netwu.com/custo/
-# added DataFountains/DMOZ Downloader http://infomine.ucr.edu/
+# added DataFountains/DMOZ Downloader http://infomine.ucr.edu/
# added Deepindex http://www.deepindex.net/faq.php
# added DNSGroup http://www.dnsgroup.com/
# added DoCoMo http://www.nttdocomo.co.jp/
@@ -219,13 +219,13 @@
# added FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de http://www.telekom.de/
# added FeedValidator http://feedvalidator.org/
# added FilmkameraBot http://www.filmkamera.at/bot.html
-# added Findexa Crawler http://www.findexa.no/gulesider/article26548.ece
+# added Findexa Crawler http://www.findexa.no/gulesider/article26548.ece
# added Global Fetch http://www.wesonet.com/
# added GOFORITBOT http://www.goforit.com/about/
# added GoForIt.com http://www.goforit.com/about/
# added GPU p2p crawler http://gpu.sourceforge.net/search_engine.php
# added HooWWWer http://cosco.hiit.fi/search/hoowwwer/
-# added HPPrint
+# added HPPrint
# added HTMLParser http://htmlparser.sourceforge.net/
# added Hundesuche.com-Bot http://www.hundesuche.com/
# added InfoBot http://www.infobot.org/
@@ -247,10 +247,10 @@
# added Matrix S.p.A. - FAST Enterprise Crawler http://tin.virgilio.it/
# added Megite http://www.megite.com/
# added Metaspinner http://index.meta-spinner.de/
-# added Mini-reptile
+# added Mini-reptile
# added Misterbot http://www.misterbot.fr/
# added Miva http://www.miva.com/
-# added Mizzu Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_m_141105_2\b
+# added Mizzu Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_m_141105_2\b
# added MSRBOT http://research.microsoft.com/research/sv/msrbot/
# added MS SharePoint Portal Server - MS Search 4.0 Robot http://support.microsoft.com/default.aspx?scid=kb;en-us;284022
# added Mydoyouhike http://www.doyouhike.net/my
@@ -267,14 +267,14 @@
# added PictureOfInternet http://malfunction.org/poi/
# added plinki http://www.plinki.com/
# added Port Huron Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1133\b
-# added PostFavorites http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1135\b
-# added ProjectWF-java-test-crawler
+# added PostFavorites http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1135\b
+# added ProjectWF-java-test-crawler
# added PyQuery http://sourceforge.net/projects/pyquery/
-# added Schizozilla http://spamhuntress.com/2005/03/18/gizmo/
+# added Schizozilla http://spamhuntress.com/2005/03/18/gizmo/
# added Scumbot
# added Sensis Web Crawler http://www.sensis.com.au/
# added snap.com beta crawler http://www.snap.com/
-# added Steeler http://www.tkl.iis.u-tokyo.ac.jp/~crawler/
+# added Steeler http://www.tkl.iis.u-tokyo.ac.jp/~crawler/
# added STEROID Download http://faqs.org.ru/progr/pascal/delphi_internet2.htm
# added Suchfin-Bot http://www.suchfin.de/
# added Sunrise http://www.sunrisexp.com/
@@ -287,15 +287,15 @@
# added UniversalFeedParser http://feedparser.org/ (seen from md301000.inktomisearch.com)
# added updated http://www.updated.com/
# added Vermut http://vermut.aol.com
-# added versus crawler from eda.baykan@epfl.ch http://www.epfl.ch/Eindex.html
+# added versus crawler from eda.baykan@epfl.ch http://www.epfl.ch/Eindex.html
# added Vespa Crawler (Yahoo Norway?) http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb
# added VSE http://www.vivisimo.com/
# added webcrawl.net http://www.webcrawl.net/
# added Web Downloader http://www.krasu.ru/soft/chuchelo/
# added Webdup http://www.webdup.com/en/index.html
-# added Wells Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b
+# added Wells Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b
# added WordPress http://wordpress.org/
-# added wume crawler http://wume.cse.lehigh.edu/~xiq204/crawler/
+# added wume crawler http://wume.cse.lehigh.edu/~xiq204/crawler/
# added Xenu's Link Sleuth (with ')
# added xirq http://www.xirq.com/
# added yoogliFetchAgent http://www.yoogli.com/
@@ -306,6 +306,8 @@
# changed favorites_sweeper -> favorites_sweeper
# -- updates
# updated AskJeeves to Ask
+# 2012-06-05 Albrecht Mueller
+# added Grabber from SDSC (San Diego Supercomputer Center).
# to do MS Search 4.0 Robot
@@ -313,7 +315,7 @@
# Robots list was found at http://www.robotstxt.org/wc/active/all.txt
-# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html
+# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html
# Rem: To avoid bad detection, some robot's ids were removed from this list:
# - Robots with ID of 3 letters only
# - Robots called 'webs' and 'tcl'
@@ -341,6 +343,7 @@
'ferret',
'googlebot',
'google\-sitemaps',
+'grabber',
'google[_+ ]web[_+ ]preview',
'gulliver',
'virus[_+ ]detector', # Must be before harvest
@@ -378,13 +381,14 @@
'webbase',
'webcollage',
'cfetch',
-'zyborg', # Must be before wisenut
+'zyborg', # Must be before wisenut
'wisenutbot'
);
@RobotsSearchIDOrder_list2 = (
# Less common robots (In robot file)
'[^a]fish',
'abcdatos',
+'abonti\.com',
'acme\.spider',
'ahoythehomepagefinder',
'alkaline',
@@ -475,6 +479,7 @@
'iaskspider',
'hl_ftien_spider',
'sogou',
+'icjobs\.de',
'iconoclast',
'ilse',
'imagelock',
@@ -777,6 +782,7 @@
'internetarchive',
'internetseer',
'internetsupervision',
+'ips\-agent',
'irlbot',
'isearch2006',
'iupui_research_bot',
@@ -799,6 +805,7 @@
'letscrawl\.com',
'libcrawl',
'linkbot',
+'linkdex\.com',
'link_valet_online',
'metager\-linkchecker', # Must be before linkchecker
'linkchecker',
@@ -883,6 +890,7 @@
'seznambot',
'shim\-crawler',
'shoutcast',
+'siteexplorer\.info',
'slysearch',
'snap\.com_beta_crawler',
'sohu\-search',
@@ -1009,7 +1017,7 @@
'^motorola$',
'movabletype',
# These appear to be bots trying to hide. All of the usual architecture data is missing.
-'^mozilla\/3\.0 \(compatible$',
+'^mozilla\/3\.0 \(compatible$',
'^mozilla\/4\.0$',
'^mozilla\/4\.0 \(compatible;\)$',
'^mozilla\/5\.0$',
@@ -1113,6 +1121,7 @@
'contentmatch','Yahoo!China ContentMatch Crawler',
'googlebot','Googlebot',
'google\-sitemaps', 'Google Sitemaps',
+'grabber', 'Grabber (SDSC)',
'google[_+ ]web[_+ ]preview', 'Google Web Preview',
'gulliver','Northern Light Gulliver',
'virus[_+ ]detector','virus_detector',
@@ -1154,6 +1163,7 @@
# Less common robots (In robot file)
'[^a]fish','Fish search',
'abcdatos','ABCdatos BotLink',
+'abonti\.com','Abonti WebSearch',
'acme\.spider','Acme.Spider',
'ahoythehomepagefinder','Ahoy! The Homepage Finder',
'alkaline','Alkaline',
@@ -1218,7 +1228,7 @@
'felix','Felix IDE',
'fetchrover','FetchRover',
'fido','fido',
-'finnish','H���ki',
+'finnish','Finnish',
'fireball','KIT-Fireball',
'fouineur','Fouineur',
'francoroute','Robot Francoroute',
@@ -1244,6 +1254,11 @@
'iaskspider','Sina Iask Spider',
'hl_ftien_spider','Hylanda',
'sogou','Sogou Spider',
+'icjobs\.de', 'iCjobs Spider (only hits on robots.txt counted!)',
+#The user agent string of the icjobs-spider seems to contain the
+#identifying string only when it accesses the robots.txt file.
+#When it accesses the actual content it does not identify itself as
+#a spider. Thus traffic of this spider is counted as user traffic.
'iconoclast','Popular Iconoclast',
'ilse','Ingrid',
'imagelock','Imagelock',
@@ -1254,6 +1269,7 @@
'infospider','InfoSpiders',
'inspectorwww','Inspector Web',
'intelliagent','IntelliAgent',
+'ips\-agent', 'ips-agent Verisign(?) - no reliable information found.',
'irobot','I, Robot',
'iron33','Iron33',
'israelisearch','Israeli-search',
@@ -1431,7 +1447,7 @@
'activebookmark','ActiveBookmark',
'adamm_bot','AdamM Bot',
'almaden','IBM Almaden Research Center WebFountain™',
-'aipbot','aipbot',
+'aipbot','aipbot',
'aleadsoftbot','ALeadSoftbot',
'alpha_search_agent','Alpha Search Agent',
'allrati','Allrati',
@@ -1468,7 +1484,7 @@
'cfnetwork','CFNetwork',
'cipinetbot','CipinetBot',
'checkweb_link_validator','CheckWeb link validator',
-'commons\-httpclient','Jakarta commons-httpclient',
+'commons\-httpclient','Jakarta commons-httpclient',
'computer_and_automation_research_institute_crawler','Computer and Automation Research Institute Crawler',
'converamultimediacrawler','ConveraMultiMediaCrawler',
'converacrawler','ConveraCrawler',
@@ -1571,6 +1587,7 @@
'libcrawl','Crawl libcrawl',
'link_valet_online','Link Valet Online',
'linkbot','LinkBot',
+'linkdex\.com', 'Linkdex',
'linkchecker','LinkChecker',
'livejournal\.com', 'LiveJournal.com',
'magpierss', 'MagpieRSS',
@@ -1652,10 +1669,11 @@
'seznambot','SeznamBot',
'shim\-crawler','Shim-Crawler',
'shoutcast','Shoutcast Directory Service',
+'siteexplorer\.info', 'Site Explorer',
'slysearch','SlySearch',
'snap\.com_beta_crawler','snap.com beta crawler',
'sohu\-search','sohu-search',
-'sohu','sohu agent',
+'sohu','sohu agent',
'snappy','Snappy',
'sphere_scout','Sphere Scout',
'spip','SPIP',
@@ -1686,7 +1704,7 @@
'unchaos_bot_hybrid_web_search_engine','UnChaos Bot Hybrid Web Search Engine',
'unido\-bot','unido-bot',
'updated','updated',
-'ustc\-semantic\-group','USTC-Semantic-Group',
+'ustc\-semantic\-group','USTC-Semantic-Group',
'vagabondo\-wap','Vagabondo-WAP',
'vagabondo','Vagabondo',
'vermut','Vermut',
@@ -1753,98 +1771,98 @@
'bot[\s_+:,\.\;\/\\\-]','Unknown robot (identified by \'bot*\')',
'[\s_+:,\.\;\/\\\-]bot','Unknown robot (identified by \'*bot\')',
'curl', 'Common *nix tool for automating web document retireval. Most likely a bot.',
-'php', 'A PHP script',
-'ruby\/', 'Ruby script',
+'php', 'A PHP script',
+'ruby\/', 'Ruby script',
# Additional bots found by Sussex.
'^[1-3]$', 'Generic bot identified as "1", "2" or "3"',
'alltop', 'alltop',
-'applesyndication', 'applesyndication',
-'asynchttpclient', 'asynchttpclient',
-'bingbot', 'bingbot',
-'blogged_crawl', 'blogged_crawl',
+'applesyndication', 'applesyndication',
+'asynchttpclient', 'asynchttpclient',
+'bingbot', 'bingbot',
+'blogged_crawl', 'blogged_crawl',
'bloglovin', 'bloglovin',
'butterfly', 'butterfly',
-'buzztracker', 'buzztracker',
+'buzztracker', 'buzztracker',
'carpathia', 'carpathia',
-'catbot', 'catbot',
-'chattertrap', 'chattertrap',
-'check_http', 'check_http (nagios)',
-'coldfusion', 'coldfusion',
+'catbot', 'catbot',
+'chattertrap', 'chattertrap',
+'check_http', 'check_http (nagios)',
+'coldfusion', 'coldfusion',
'covario', 'covario',
-'daylifefeedfetcher', 'daylifefeedfetcher',
-'discobot', 'discobot',
+'daylifefeedfetcher', 'daylifefeedfetcher',
+'discobot', 'discobot',
'dlvr\.it', 'dlvr.it',
'dreamwidth', 'dreamwidth',
-'drupal', 'Drupal Site',
+'drupal', 'Drupal Site',
'ezoom', 'ezoom',
-'feedmyinbox', 'feedmyinbox',
+'feedmyinbox', 'feedmyinbox',
'feedroll\.com', 'feedroll.com',
'feedzira', 'feedzira',
-'fever\/', 'Feed a Fever',
-'freenews', 'freenews',
+'fever\/', 'Feed a Fever',
+'freenews', 'freenews',
'geohasher', 'geohasher',
-'hanrss', 'hanrss',
-'inagist', 'inagist',
-'jacobin club', 'jacobin club',
+'hanrss', 'hanrss',
+'inagist', 'inagist',
+'jacobin club', 'jacobin club',
'jakarta', 'jakarta',
-'js\-kit', 'js-kit',
-'largesmall crawler', 'largesmall crawler',
-'linkedinbot', 'linkedinbot',
-'longurl', 'longurl',
-'metauri', 'metauri',
-'microsoft\-webdav\-miniredir', 'microsoft-webdav-miniredir',
+'js\-kit', 'js-kit',
+'largesmall crawler', 'largesmall crawler',
+'linkedinbot', 'linkedinbot',
+'longurl', 'longurl',
+'metauri', 'metauri',
+'microsoft\-webdav\-miniredir', 'microsoft-webdav-miniredir',
'^motorola$', 'Suspected Bot masquerading as "Motorola"',
-'movabletype', 'movabletype',
-'^mozilla\/3\.0 \(compatible$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/4\.0$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/4\.0 \(compatible;\)$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/5\.0$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/5\.0 \(compatible;$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/5\.0 \(en\-us\)$', 'Suspected bot masqurading as Mozilla',
-'^mozilla\/5\.0 firefox\/3\.0\.5$', 'Suspected bot masqurading as Mozilla',
-'^msie', 'Suspected bot masquerading as M$ IE',
+'movabletype', 'movabletype',
+'^mozilla\/3\.0 \(compatible$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/4\.0$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/4\.0 \(compatible;\)$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/5\.0$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/5\.0 \(compatible;$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/5\.0 \(en\-us\)$', 'Suspected bot masqurading as Mozilla',
+'^mozilla\/5\.0 firefox\/3\.0\.5$', 'Suspected bot masqurading as Mozilla',
+'^msie', 'Suspected bot masquerading as M$ IE',
'netnewswire', 'netnewswire',
' netseer ', 'Net Seer',
-'netvibes', 'netvibes',
+'netvibes', 'netvibes',
'newrelicpinger', 'newrelicpinger',
'newsfox', 'Fox News',
-'nextgensearchbot', 'nextgensearchbot',
-'ning', 'ning',
-'pingdom', 'pingdom',
-'pita', 'pita (pain in the ass?)',
-'postpost', 'postpost',
+'nextgensearchbot', 'nextgensearchbot',
+'ning', 'ning',
+'pingdom', 'pingdom',
+'pita', 'pita (pain in the ass?)',
+'postpost', 'postpost',
'postrank', 'postrank',
-'printfulbot', 'printfulbot',
-'protopage', 'protopage',
-'proximic', 'proximic',
-'quipply', 'quipply',
+'printfulbot', 'printfulbot',
+'protopage', 'protopage',
+'proximic', 'proximic',
+'quipply', 'quipply',
'r6\_', 'Radian 6 Crawler',
-'ratingburner', 'ratingburner',
-'regator', 'regator',
-'rome client', 'rome client',
-'rpt\-httpclient', 'rpt-httpclient',
-'rssgraffiti', 'rssgraffiti',
+'ratingburner', 'ratingburner',
+'regator', 'regator',
+'rome client', 'rome client',
+'rpt\-httpclient', 'rpt-httpclient',
+'rssgraffiti', 'rssgraffiti',
'sage\+\+', 'sage++',
'scoutjet', 'ScoutJet crawler for Blekko.',
-'simplepie', 'simplepie',
-'sitebot', 'sitebot',
-'summify\.com', 'summify.com',
-'superfeedr', 'superfeedr',
-'synthesio', 'synthesio',
+'simplepie', 'simplepie',
+'sitebot', 'sitebot',
+'summify\.com', 'summify.com',
+'superfeedr', 'superfeedr',
+'synthesio', 'synthesio',
'teoma', 'teoma',
-'topblogsinfo', 'topblogsinfo',
-'topix\.net', 'topix.net',
-'trapit', 'trapit',
-'trileet', 'trileet',
+'topblogsinfo', 'topblogsinfo',
+'topix\.net', 'topix.net',
+'trapit', 'trapit',
+'trileet', 'trileet',
'tweetedtimes', 'The Tweeted Times',
-'twisted pagegetter', 'twisted pagegetter',
-'twitterbot', 'twitterbot',
-'twitterfeed', 'twitterfeed',
-'unwindfetchor', 'unwindfetchor',
-'wazzup', 'wazzup',
+'twisted pagegetter', 'twisted pagegetter',
+'twitterbot', 'twitterbot',
+'twitterfeed', 'twitterfeed',
+'unwindfetchor', 'unwindfetchor',
+'wazzup', 'wazzup',
'windows\-rss\-platform', 'windows-rss-platform',
'wiumi', 'wiumi',
-'xydo', 'xydo',
+'xydo', 'xydo',
'yahoo! slurp', 'Additional Yahoo bots.',
'yahoo pipes', 'Additional Yahoo bots.',
'yahoo\-newscrawler', 'Additional Yahoo bots.',
@@ -1852,13 +1870,13 @@
'yahooexternalcache', 'Additional Yahoo bots.',
'yahoo! searchmonkey', 'Additional Yahoo bots.',
'yahooysmcm', 'Additional Yahoo bots.',
-'yammer', 'yammer',
-'yandexbot', 'yandexbot',
-'yeti', 'yeti',
+'yammer', 'yammer',
+'yandexbot', 'yandexbot',
+'yeti', 'yeti',
'yie8', 'yie8',
-'youdao', 'youdao',
-'yourls', 'yourls',
-'zemanta', 'zemanta',
+'youdao', 'youdao',
+'yourls', 'yourls',
+'zemanta', 'zemanta',
'zend_http_client', 'Zend Http Client',
'no_user_agent','Unknown robot (identified by empty user agent string)',
# Unknown robots identified by hit on robots.txt
@@ -1903,4 +1921,4 @@
'yahooysmcm'=>'Yahoo'
);
-1;
+1;
\ No newline at end of file
diff --git a/wwwroot/cgi-bin/lib/search_engines.pm b/wwwroot/cgi-bin/lib/search_engines.pm
index 52907ab9..9190ac0d 100644
--- a/wwwroot/cgi-bin/lib/search_engines.pm
+++ b/wwwroot/cgi-bin/lib/search_engines.pm
@@ -303,6 +303,14 @@
'start\.iminent\.com',
'www\.searchmobileonline\.com',
'int\.search-results\.com',
+'www2\.inbox\.com',
+'www\.govome\.com',
+'find1friend\.com',
+'start\.mysearchdial\.com',
+'go\.speedbit\.com',
+'search\.certified-toolbar\.com',
+'search\.sweetim\.com',
+'search\.searchcompletion\.com',
# Chello Portals
'chello\.at',
'chello\.be',
@@ -390,6 +398,11 @@
'suche\.aol\.de',
'www\.startxxl\.com',
'www\.benefind\.de',
+'www\.amazon\.de.*search', #Just as a reminder, probably will not work as AWstats seem to consider the host part of an URL only
+'de\.wow\.com',
+'www\.vlips\.de',
+'www\.metager\.de',
+'search\.1und1\.de',
# Minor Hungarian search engines
'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu',
'tango\.hu',
@@ -541,6 +554,14 @@
'start\.iminent\.com', 'iminent',
'www\.searchmobileonline\.com', 'searchmobileonline',
'int\.search-results\.com', 'nortonsavesearch',
+'www2\.inbox\.com', 'inbox',
+'www\.govome\.com', 'govome',
+'find1friend\.com', 'find1friend',
+'start\.mysearchdial\.com', 'mysearchdial',
+'go\.speedbit\.com', 'speedbit',
+'search\.certified-toolbar\.com', 'certifiedtoolbarsearch',
+'search\.sweetim\.com', 'sweetim',
+'search\.searchcompletion\.com', 'searchcompletion',
# Chello Portals
'chello\.at','chelloat',
'chello\.be','chellobe',
@@ -667,6 +688,11 @@
'suche\.aol\.de', 'aolsuche',
'www\.startxxl\.com', 'startxxl',
'www\.benefind\.de', 'benefind',
+'www\.amazon\.de.*search', 'amazonsearch', #Not clear if this matches amazon searches only
+'de\.wow\.com', 'wowsearch',
+'www\.vlips\.de', 'vlips_de',
+'www\.metager\.de', 'metager',
+'search\.1und1\.de', 'search_1und1_de',
# Minor Hungarian search engines
'heureka\.hu','heureka',
'vizsla\.origo\.hu','origo',
@@ -832,6 +858,14 @@
'iminent', 'q=',
'searchmobileonline', 'q=',
'nortonsavesearch', 'q=',
+'inbox', 'q(?:kw)?=',
+'govome', 'q=',
+'find1friend', 'q=',
+'mysearchdial', 'q=',
+'speedbit', 'q=',
+'certifiedtoolbarsearch', 'q=',
+'sweetim', 'q=',
+'searchcompletion', 'q=',
# Chello Portals
'chelloat','q1=',
'chellobe','q1=',
@@ -921,6 +955,11 @@
'aolsuche', 'q=',
'startxxl', 'q=',
'benefind', 'q=',
+'amazonsearch', 'query=',
+'wowsearch', 'q=',
+'vlips_de', 'q=',
+'metager', 'eingabe=',
+'search_1und1_de', 'q=',
# Minor Hungarian search engines
'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=',
'keresolap_hu','q=',
@@ -1084,6 +1123,14 @@
'iminent', 'Iminent',
'searchmobileonline', 'Search Mobile Online (StartApp)',
'nortonsavesearch', 'Norton Safe Search',
+'inbox', 'Inbox Search',
+'govome', 'Govome',
+'find1friend', 'Find1Friend',
+'mysearchdial', 'My Search Dial',
+'speedbit', 'Speedbit',
+'certifiedtoolbarsearch', 'Certified-Toolbar Search',
+'sweetim', 'SweetIM Search',
+'searchcompletion', 'SearchCompletion Search',
# Chello Portals
'chelloat','Chello Austria',
'chellobe','Chello Belgium',
@@ -1175,6 +1222,11 @@
'aolsuche', 'AOL Suche',
'startxxl', 'StartXXL',
'benefind', 'benefind',
+'amazonsearch', 'Amazon Web Search',
+'wowsearch', 'Wow Search',
+'vlips_de', 'vlips.de',
+'metager', 'MetaGer',
+'search_1und1_de', '1&1 Suche',
# Minor hungarian search engines
'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkereso', 'goliat','Goliat', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Kereso',
'tango_hu','Tango',
@@ -1264,4 +1316,4 @@
#}
#print @SearchEnginesSearchIDOrder_list1." ".@SearchEnginesSearchIDOrder_list2." ".@SearchEnginesSearchIDOrder_listgen;
-1;
+1;
\ No newline at end of file