From: eldy <>
Date: Wed, 23 Nov 2005 20:50:30 +0000 (+0000)
Subject: Detect linux and bsd distributions and groups them by families
X-Git-Tag: AWSTATS_6_5_BETA_2~10
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=962a816c27a166cd877cf3bee44b423b73df0961;p=thirdparty%2FAWStats.git
Detect linux and bsd distributions and groups them by families
---
diff --git a/wwwroot/cgi-bin/lib/operating_systems.pm b/wwwroot/cgi-bin/lib/operating_systems.pm
index f9e597a0..2cfb1f9f 100644
--- a/wwwroot/cgi-bin/lib/operating_systems.pm
+++ b/wwwroot/cgi-bin/lib/operating_systems.pm
@@ -5,6 +5,12 @@
#-------------------------------------------------------
# $Revision$ - $Author$ - $Date$
+# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html:
+# - added specific Linux distributions in addition to
+# the generic Linux. Requires modifications to awstats.pl
+# if you want to see Linux as a Group in the main report.
+# Included documentation link to Distribution home pages.
+# - added links for each operating systems.
#package AWSOS;
@@ -33,19 +39,30 @@
'mac[_+ ]68', # This works for mac_6800 and mac_68k
'macweb',
'macintosh',
-# Unix like OS
+# Linux family
+'linux(.*)centos',
+'linux(.*)debian',
+'linux(.*)fedora',
+'linux(.*)mandr',
+'linux(.*)red[_+ ]hat',
+'linux(.*)suse',
+'linux(.*)ubuntu',
'linux',
+# Hurd family
+'gnu.hurd',
+# BSDs family
+'bsdi',
+'freebsd',
+'openbsd',
+'netbsd',
+# Other Unix, Unix-like
'aix',
'sunos',
'irix',
'osf',
-'hp-ux',
-'netbsd',
-'bsdi',
-'freebsd',
-'openbsd',
-'gnu.hurd',
-'unix','x11',
+'hp\-ux',
+'unix',
+'x11',
# Other famous OS
'beos',
'os/2',
@@ -81,19 +98,30 @@
# Macintosh OS family
'mac[_+ ]os[_+ ]x','macosx',
'mac[_+ ]?p','macintosh','mac[_+ ]68','macintosh','macweb','macintosh','macintosh','macintosh',
-# Unix like OS
+# Linux family (linuxyyy)
+'linux(.*)centos','linuxcentos',
+'linux(.*)debian','linuxdebian',
+'linux(.*)fedora','linuxfedora',
+'linux(.*)mandr','linuxmandr',
+'linux(.*)red[_+ ]hat','linuxredhat',
+'linux(.*)suse','linuxsuse',
+'linux(.*)ubuntu','linuxubuntu',
'linux','linux',
+# Hurd family
+'gnu.hurd','gnu',
+# BSDs family (bsdyyy)
+'netbsd','bsdnetbsd',
+'bsdi','bsdi',
+'freebsd','bsdfreebsd',
+'openbsd','bsdopenbsd',
+# Other Unix, Unix-like
'aix','aix',
'sunos','sunos',
'irix','irix',
'osf','osf',
-'hp-ux','hp-ux',
-'netbsd','netbsd',
-'bsdi','bsdi',
-'freebsd','freebsd',
-'openbsd','openbsd',
-'gnu.hurd','gnu',
-'unix','unix','x11','unix',
+'hp\-ux','hp\-ux',
+'unix','unix',
+'x11','unix',
# Other famous OS
'beos','beos',
'os/2','os/2',
@@ -127,18 +155,31 @@
# Macintosh OS
'macosx','Mac OS X',
'macintosh','Mac OS',
-# Unix like OS
-'linux','GNU Linux',
+# Linux
+'linuxcentos','Centos',
+'linuxdebian','Debian',
+'linuxfedora','Fedora',
+'linuxmandr','Mandriva (or Mandrake)',
+'linuxredhat','Red Hat',
+'linuxsuse','Suse',
+'linuxubuntu','Ubuntu',
+'linux','GNU Linux (Unknown or unspecified distribution)',
+# Hurd
+'gnu','GNU Hurd',
+# BSDs
+'netbsd','NetBSD', # For backard compatibility
+'bsdnetbsd','NetBSD',
+'bsdi','BSDi',
+'freebsd','FreeBSD', # For backard compatibility
+'bsdfreebsd','FreeBSD',
+'openbsd','OpenBSD', # For backard compatibility
+'bsdopenbsd','OpenBSD',
+# Other Unix, Unix-like
'aix','Aix',
'sunos','Sun Solaris',
'irix','Irix',
'osf','OSF Unix',
-'hp-ux','HP Unix',
-'netbsd','NetBSD',
-'bsdi','BSDi',
-'freebsd','FreeBSD',
-'openbsd','OpenBSD',
-'gnu','GNU Hurd',
+'hp\-ux','HP Unix',
'unix','Unknown Unix system',
# Other famous OS
'beos','BeOS',
diff --git a/wwwroot/cgi-bin/lib/robots.pm b/wwwroot/cgi-bin/lib/robots.pm
index 09c36e9d..2feebac1 100644
--- a/wwwroot/cgi-bin/lib/robots.pm
+++ b/wwwroot/cgi-bin/lib/robots.pm
@@ -5,16 +5,59 @@
#-------------------------------------------------------
# $Revision$ - $Author$ - $Date$
+# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html:
+# added dipsie (not tested with real data).
+# added DomainsDB.net http://domainsdb.net/
+# added ia_archiver-web.archive.org (was inadvertently grouped with Alexa traffic)
+# added Nutch (used by looksmart (furl?))
+# added rssImagesBot
+# added Sqworm
+# added t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e
+# added w3c css-validator
+# added documentation link to bot home pages for above and selected major bots.
+# In the case of international bots, choose .com page.
+# Included tool tip (html "title").
+# To do: parameterize to match both AWStats language and tooltips settings.
+# To do: add html links for all bots based on current documentation in source
+# files referenced below.
+# changed '\wbot[\/\-]', to '\wbot[\/\-]' (removed comma)
+# made minor grammar corrections to notes below
+# 2005-08-24 added YahooSeeker-Testing
+# added w3c-checklink
+# updated url for ask.com
+# 2005-08-24 added Girafabot http://www.girafa.com/
+# 2005-08-30 added PluckFeedCrawler http://www.pluck.com/
+# added Gaisbot/3.0 (robot05@gais.cs.ccu.edu.tw; )
+# dded geniebot (wgao@genieknows.com)
+# added BecomeBot link http://www.become.com/site_owners.html
+# added topicblogs http://www.topicblogs.com/
+# added Powermarks; seen used by referrer spam
+# added YahooSeeker
+# added NG/2. http://www.exabot.com/
+# 2005-09-15 added link for Walhello appie
+# added bender focused_crawler
+# updated YahooSeeker description (blog crawler)
+# 2005-09-16 added link for http://linkchecker.sourceforge.net
+# added ConveraCrawler/0.9d ( http://www.authoritativeweb.com/crawl)
+# added Blogslive info@blogslive.com intelliseek.com
+# added BlogPulse (ISSpider-3.0) intelliseek.com
+# 2005-09-26 added Feedfetcher-Google (http://www.google.com/feedfetcher.html)
+# added EverbeeCrawler
+# added Yahoo-Blogs http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html
+# added link for Bloglines http://www.bloglines.com
+# 2005-10-19 fixed Feedfetcher-Google (http://www.google.com/feedfetcher.html)
+# added Blogshares Spiders (Synchronized V1.5.1)
+# added yacy
#package AWSROB;
# Robots list was found at http://www.robotstxt.org/wc/active/all.txt
# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html
-# Rem: To avoid bad detection, some robots id were removed from this list:
+# Rem: To avoid bad detection, some robot's ids were removed from this list:
# - Robots with ID of 3 letters only
-# - Robot called 'webs' and 'tcl'
-# Rem: Some robot most used for download are also removed: wget
+# - Robots called 'webs' and 'tcl'
+# Rem: Some robots mostly used for downloading have also been removed, i.e. wget
# Rem: directhit changed into direct_hit (its real id)
# Rem: calif changed into calif[^r] to avoid confusion between Tiscalifreenet browser
# Rem: fish changed into [^a]fish to avoid confusion between Madsafish browser
@@ -25,7 +68,7 @@
# RobotsSearchIDOrder
# It contains all matching criteria to search for in log fields. This list is
# used to know in which order to search Robot IDs.
-# Most frequent one are in list1, used when LevelForRobotsDetection is 1 or more
+# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more
# Minor robots are in list2, used when LevelForRobotsDetection is 2 or more
# Note: Robots IDs are in lower case, ' ' and '+' are changed into '_' and are quoted.
#-------------------------------------------------------
@@ -55,6 +98,7 @@
'digout4u',
'echo',
'fast\-webcrawler',
+'ia_archiver\-web\.archive\.org', # Must be before ia_archiver to avoid confusion with alexa
'ia_archiver',
'jennybot',
'mercator',
@@ -79,6 +123,7 @@
'araneo',
'aretha',
'ariadne',
+'powermarks',
'arks',
'aspider',
'atn\.txt',
@@ -269,6 +314,7 @@
'spiderman',
'spiderview',
'spry',
+'sqworm',
'ssearcher',
'suke',
'suntek',
@@ -330,18 +376,30 @@
'awbot',
'baiduspider',
'becomebot',
+'bender',
'bloglines',
+'blogpulse',
+'blogshares',
+'blogslive',
'bobby',
'boris',
'bumblebee',
+'converacrawler',
'cscrawler',
'daviesbot',
'daypopbot',
+'dipsie\.bot',
+'domainsdb\.net',
'exactseek',
+'everbeecrawler',
'ezresult',
'feedburner',
+'feedfetcher\-google',
'feedster',
+'gaisbot',
+'geniebot',
'gigabot',
+'girafabot',
'gnodspider',
'grub',
'henrythemiragorobot',
@@ -360,37 +418,50 @@
'msiecrawler',
'nagios',
'newsgatoronline',
+'nutch',
'perman',
+'pluckfeedcrawler',
'pompos',
'popdexter',
'rambler',
'redalert',
'rojo',
+'rssimagesbot',
'shoutcast',
'slysearch',
'surveybot',
'syndic8',
'technoratibot',
+'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e',
+'topicblogs',
'turnitinbot',
'turtlescanner', # Must be before turtle
'turtle',
'ultraseek',
+'w3c\-checklink',
+'w3c_css_validator_jfouffa',
'w3c_validator',
'webclipping\.com',
'webcompass',
'wonderer',
+'yacy',
+'yahoo\-blogs',
'yahoo\-verticalcrawler',
'yahoofeedseeker',
+'yahooseeker\-testing',
+'yahooseeker',
'yandex',
'zealbot',
-'zyborg'
+'zyborg',
+'ng\/1\.', # put at end to avoid false positive
+'ng\/2\.' # put at end to avoid false positive
);
@RobotsSearchIDOrder_listgen = (
# Generic robot
'robot',
'crawl',
'spider',
-'\wbot[\/\-]',
+'\wbot[\/\-]'
);
@@ -400,12 +471,12 @@
#-------------------------------------------------------
%RobotsHashIDLib = (
# Common robots (In robot file)
-'appie','Walhello appie',
+'appie','Walhello appie',
'architext','ArchitextSpider',
-'jeeves','AskJeeves',
+'jeeves','AskJeeves',
'bjaaland','Bjaaland',
'ferret','Wild Ferret Web Hopper #1, #2, #3',
-'googlebot','Googlebot',
+'googlebot','Googlebot',
'gulliver','Northern Light Gulliver',
'harvest','Harvest',
'htdig','ht://Dig',
@@ -416,7 +487,7 @@
'myweb','Internet Shinchakubin',
'nomad','Nomad',
'scooter','Scooter',
-'slurp','Yahoo Slurp',
+'slurp','Yahoo Slurp',
'^voyager\/','Voyager',
'weblayers','weblayers',
# Common robots (Not in robot file)
@@ -424,11 +495,12 @@
'digout4u','Digout4u',
'echo','EchO!',
'fast\-webcrawler','Fast-Webcrawler',
-'ia_archiver','Alexa (IA Archiver)',
+'ia_archiver\-web\.archive\.org','The web archive (IA Archiver)',
+'ia_archiver','Alexa (IA Archiver)',
'jennybot','JennyBot',
'mercator','Mercator',
-'msnbot','MSNBot',
-'netcraft','Netcraft',
+'msnbot','MSNBot',
+'netcraft','Netcraft',
'petersnews','Petersnews',
'unlost_web_crawler','Unlost Web Crawler',
'voila','Voila',
@@ -446,6 +518,7 @@
'araneo','Araneo',
'aretha','Aretha',
'ariadne','ARIADNE',
+'powermarks','Powermarks', # must come before Arks; seen used by referrer spam
'arks','arks',
'aspider','ASpider (Associative Spider)',
'atn\.txt','ATN Worldwide',
@@ -499,7 +572,7 @@
'felix','Felix IDE',
'fetchrover','FetchRover',
'fido','fido',
-'finnish','Hämähäkki',
+'finnish','H���ki',
'fireball','KIT-Fireball',
'fouineur','Fouineur',
'francoroute','Robot Francoroute',
@@ -547,7 +620,7 @@
'kilroy','Kilroy',
'ko_yappo_robot','KO_Yappo_Robot',
'labelgrabber\.txt','LabelGrabber',
-'larbin','larbin',
+'larbin','larbin',
'legs','legs',
'linkidator','Link Validator',
'linkscan','LinkScan',
@@ -570,7 +643,7 @@
'ndspider','NDSpider',
'nederland\.zoek','Nederland.zoek',
'netcarta','NetCarta WebMap Engine',
-'netmechanic','NetMechanic',
+'netmechanic','NetMechanic',
'netscoop','NetScoop',
'newscan\-online','newscan-online',
'nhse','NHSE Web Forager',
@@ -598,8 +671,8 @@
'plumtreewebaccessor','PlumtreeWebAccessor',
'poppi','Poppi',
'portalb','PortalB Spider',
-'psbot','psbot',
-'python','The Python Robot',
+'psbot','psbot',
+'python','Python-urllib',
'raven','Raven Search',
'rbse','RBSE Spider',
'resumerobot','Resume Robot',
@@ -637,6 +710,7 @@
'spiderview','SpiderView(tm)',
'spry','Spry Wizard Robot',
'ssearcher','Site Searcher',
+'sqworm','Sqworm',
'suke','Suke',
'suntek','suntek search engine',
'sven','Sven',
@@ -696,19 +770,31 @@
'asterias', 'Asterias',
'awbot', 'AWBot',
'baiduspider','BaiDuSpider',
-'becomebot', 'BecomeBot',
-'bloglines','Bloglines',
+'becomebot', 'BecomeBot',
+'bender','bender focused_crawler',
+'bloglines','Bloglines',
+'blogpulse','BlogPulse ISSpider intelliseek.com',
+'blogshares','Blogshares Spiders',
+'blogslive','Blogslive',
'bobby', 'Bobby',
'boris', 'Boris',
'bumblebee', 'Bumblebee (relevare.com)',
+'converacrawler','ConveraCrawler',
'cscrawler','CsCrawler',
'daviesbot', 'DaviesBot',
'daypopbot', 'DayPop',
+'dipsie\.bot','Dipsie',
+'domainsdb\.net','DomainsDB.net',
'exactseek','ExactSeek Crawler',
-'ezresult', 'Ezresult',
+'everbeecrawler','EverbeeCrawler',
+'ezresult', 'Ezresult',
'feedburner', 'Feedburner',
-'feedster', 'Feedster',
+'feedfetcher\-google','Feedfetcher-Google',
+'feedster', 'Feedster',
+'geniebot','geniebot',
+'gaisbot','Gaisbot',
'gigabot','GigaBot',
+'girafabot','Girafabot',
'gnodspider','GNOD Spider',
'grub','Grub.org',
'henrythemiragorobot', 'Mirago',
@@ -718,40 +804,52 @@
'kinjabot', 'Kinjabot',
'kinja\-imagebot', 'Kinja Imagebot',
'linkbot','LinkBot',
-'linkchecker','LinkChecker',
+'linkchecker','LinkChecker',
'livejournal\.com', 'LiveJournal.com',
'magpierss', 'MagpieRSS',
-'mediapartners\-google','Google AdSense',
+'mediapartners\-google','Google AdSense',
'metager\-linkchecker','MetaGer LinkChecker',
'microsoft_url_control','Microsoft URL Control',
'nagios','Nagios',
'newsgatoronline', 'NewsGator Online',
+'nutch','Nutch',
'msiecrawler','MSIECrawler',
'perman', 'Perman surfer',
+'pluckfeedcrawler','PluckFeedCrawler',
'pompos','Pompos',
'popdexter','Popdexter',
'rambler', 'StackRambler',
'redalert', 'Red Alert',
-'rojo', 'RoJo',
+'rojo', 'RoJo aggregator',
+'rssimagesbot','rssImagesBot',
'shoutcast','Shoutcast Directory Service',
'slysearch','SlySearch',
'surveybot','SurveyBot',
'syndic8','Syndic8',
'technoratibot', 'Technoratibot',
+'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','T-H-U-N-D-E-R-S-T-O-N-E',
+'topicblogs', 'topicblogs',
'turnitinbot','Turn It In',
'turtle', 'Turtle',
'turtlescanner', 'Turtle',
'ultraseek', 'Ultraseek',
-'w3c_validator', 'W3C Validator',
+'w3c\-checklink','W3C Link Checker',
+'w3c_css_validator_jfouffa', 'W3C jigsaw CSS Validator',
+'w3c_validator', 'W3C Validator',
'webclipping\.com', 'WebClipping.com',
'webcompass', 'webcompass',
'wonderer', 'Web Wombat Redback Spider',
+'yacy','yacy',
+'yahoo\-blogs','Yahoo-Blogs',
'yahoo\-verticalcrawler', 'Yahoo Vertical Crawler',
-'yahoofeedseeker', 'Yahoo Feed Seeker',
+'yahoofeedseeker', 'Yahoo Feed Seeker',
+'yahooseeker\-testing', 'YahooSeeker-Testing',
+'yahooseeker', 'YahooSeeker Yahoo! Blog crawler',
'yandex', 'Yandex bot',
'zealbot','ZealBot',
'zyborg','Zyborg',
-
+'ng\/1\.','NG 1.x (Exalead)', # put at end to avoid false positive
+'ng\/2\.','NG 2.x (Exalead)', # put at end to avoid false positive
# Generic root ID
'robot', 'Unknown robot (identified by \'robot\')',
'crawl', 'Unknown robot (identified by \'crawl\')',
@@ -769,12 +867,11 @@
'fast\-webcrawler'=>'AllTheWeb',
'googlebot'=>'Google',
'msnbot'=>'MSN',
+'nutch'=>'Looksmart',
'scooter'=>'AltaVista',
'wisenutbot'=>'Looksmart',
'yahoo\-verticalcrawler'=>'Yahoo',
'zyborg'=>'Looksmart'
);
-
-
1;
diff --git a/wwwroot/cgi-bin/lib/search_engines.pm b/wwwroot/cgi-bin/lib/search_engines.pm
index 31824e08..9f7907a4 100644
--- a/wwwroot/cgi-bin/lib/search_engines.pm
+++ b/wwwroot/cgi-bin/lib/search_engines.pm
@@ -7,6 +7,26 @@
#------------------------------------------------------------------------------
# $Revision$ - $Author$ - $Date$
+# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html:
+# added minor italian search engines
+# arianna http://arianna.libero.it/
+# supereva http://search.supereva.com/
+# kataweb http://kataweb.it/
+# corrected uk looksmart
+# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmart','key=',
+# to
+# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=',
+# corrected spelling
+# internationnal -> international
+# added 'google\.'=>'mail\.google\.', to NotSearchEnginesKeys in order to
+# avoid counting gmail referrals as search engine traffic
+# 2005-08-21 Sean Carlos http://www.antezeta.com/awstats.html:
+# avoid counting babelfish.altavista referrals as search engine traffic
+# avoid counting translate.google referrals as search engine traffic
+# 2005-11-20 Sean Carlos
+# added missing 'tiscali','key=', entry. Check order
+# 2005-11-22 Sean Carlos
+# added Google Base & Froogle. Froogle not tested.
#package AWSSE;
@@ -19,7 +39,9 @@
# Note: Regex IDs are in lower case and ' ' and '+' are changed into '_'
#------------------------------------------------------------------------------
@SearchEnginesSearchIDOrder_list1=(
-# Major internationnal search engines
+# Major international search engines
+'base\.google\.',
+'froogle\.google\.',
'images\.google\.',
'google\.','216\.239\.(35\.101|37\.101|39\.100|39\.101|51\.100|51\.101|35\.100)',
'msn\.',
@@ -41,7 +63,7 @@
);
@SearchEnginesSearchIDOrder_list2=(
-# Minor internationnal search engines
+# Minor international search engines
'northernlight\.',
'hotbot\.',
'kvasir\.',
@@ -92,7 +114,7 @@
# Minor hungarian search engines
'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu',
# Minor italian search engines
-'virgilio\.it',
+'virgilio\.it','arianna\.libero\.it','supereva\.com','kataweb\.it',
# Minor norvegian search engines
'sok\.start\.no',
# Minor polish search engines
@@ -115,6 +137,9 @@
# really a search engine
#------------------------------------------------------------------------------
%NotSearchEnginesKeys=(
+'altavista\.'=>'babelfish\.altavista\.',
+'google\.'=>'mail\.google\.',
+'google\.'=>'translate\.google\.',
'msn\.'=>'hotmail\.msn\.',
'yahoo\.'=>'mail\.yahoo\.'
);
@@ -124,7 +149,9 @@
# Each Search Engine Search ID is associated to an AWStats id string
#------------------------------------------------------------------------------
%SearchEnginesHashID = (
-# Major internationnal search engines
+# Major international search engines
+'base\.google\.','google_base',
+'froogle\.google\.','google_froogle',
'images\.google\.','google_image',
'google\.','google','216\.239\.(35\.101|37\.101|39\.100|39\.101|51\.100|51\.101|35\.100)','google',
'msn\.','msn',
@@ -143,7 +170,7 @@
'search\.aol\.co','aol',
'search\.sli\.sympatico\.ca','sympatico',
'excite\.','excite',
-# Minor internationnal search engines
+# Minor international search engines
'northernlight\.','northernlight',
'hotbot\.','hotbot',
'kvasir\.','kvasir',
@@ -244,6 +271,9 @@
'search\.internetto\.hu','internetto',
# Minor italian search engines
'virgilio\.it','virgilio',
+'arianna\.libero\.it','arianna',
+'supereva\.com','supereva',
+'kataweb\.it','kataweb',
# Minor norvegian search engines
'sok\.start\.no','start',
# Minor polish search engines
@@ -281,11 +311,14 @@
'altavista','q=',
'a9','a9\.com\/',
'dmoz','search=',
-'google','(p|q|as_p|as_q)=',
+'google_base','(p|q|as_p|as_q)=',
+'google_froogle','(p|q|as_p|as_q)=',
'google_image','(p|q|as_p|as_q)=',
+'google','(p|q|as_p|as_q)=',
'lycos','query=',
'msn','q=',
'netscape','search=',
+'tiscali','key=',
'aol','query=',
'terra','query=',
'voila','kw=',
@@ -293,7 +326,7 @@
'yahoo','p=',
'sympatico', 'query=',
'excite','search=',
-# Minor internationnal search engines
+# Minor international search engines
'go','qt=',
'ask','ask=',
'atomz','sp-q=',
@@ -313,6 +346,9 @@
'spray','string=',
'teoma','q=',
'virgilio','qs=',
+'arianna','query=',
+'supereva','q=',
+'kataweb','q=',
'webcrawler','searchText=',
'wisenut','query=',
'ixquick', 'query=',
@@ -329,7 +365,7 @@
# Minor dutch search engines
'ilse','search_for=', 'vindex','in=',
# Minor english search engines
-'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmart','key=',
+'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=',
'mirago','txtsearch=', 'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=',
# Minor finnish search engines
'haku','w=',
@@ -385,14 +421,16 @@
# 'search_engine_id', 'search_engine_name',
#------------------------------------------------------------------------------
%SearchEnginesHashLib=(
-# Major internationnal search engines
+# Major international search engines
'alexa','Alexa',
'alltheweb','AllTheWeb',
'altavista','AltaVista',
'a9', 'A9.com',
'dmoz','DMOZ',
-'google','Google',
+'google_base','Google (Base)',
+'google_froogle','Froogle (Google)',
'google_image','Google (Images)',
+'google','Google',
'lycos','Lycos',
'msn','MSN',
'netscape','Netscape',
@@ -404,7 +442,7 @@
'yahoo','Yahoo',
'sympatico', 'Sympatico',
'excite','Excite',
-# Minor internationnal search engines
+# Minor international search engines
'go','Go.com',
'ask','Ask Jeeves',
'atomz','Atomz',
@@ -457,7 +495,7 @@
# Minor hungarian search engines
'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkeresõ', 'goliat','Góliát', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Keresõ',
# Minor italian search engines
-'virgilio','Virgilio',
+'virgilio','Virgilio', 'arianna','Arianna', 'supereva','Supereva', 'kataweb','Kataweb',
# Minor norvegian search engines
'start','start.no',
# Minor polish search engines