From: eldy <> Date: Wed, 23 Nov 2005 20:50:30 +0000 (+0000) Subject: Detect linux and bsd distributions and groups them by families X-Git-Tag: AWSTATS_6_5_BETA_2~10 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=962a816c27a166cd877cf3bee44b423b73df0961;p=thirdparty%2FAWStats.git Detect linux and bsd distributions and groups them by families --- diff --git a/wwwroot/cgi-bin/lib/operating_systems.pm b/wwwroot/cgi-bin/lib/operating_systems.pm index f9e597a0..2cfb1f9f 100644 --- a/wwwroot/cgi-bin/lib/operating_systems.pm +++ b/wwwroot/cgi-bin/lib/operating_systems.pm @@ -5,6 +5,12 @@ #------------------------------------------------------- # $Revision$ - $Author$ - $Date$ +# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html: +# - added specific Linux distributions in addition to +# the generic Linux. Requires modifications to awstats.pl +# if you want to see Linux as a Group in the main report. +# Included documentation link to Distribution home pages. +# - added links for each operating systems. #package AWSOS; @@ -33,19 +39,30 @@ 'mac[_+ ]68', # This works for mac_6800 and mac_68k 'macweb', 'macintosh', -# Unix like OS +# Linux family +'linux(.*)centos', +'linux(.*)debian', +'linux(.*)fedora', +'linux(.*)mandr', +'linux(.*)red[_+ ]hat', +'linux(.*)suse', +'linux(.*)ubuntu', 'linux', +# Hurd family +'gnu.hurd', +# BSDs family +'bsdi', +'freebsd', +'openbsd', +'netbsd', +# Other Unix, Unix-like 'aix', 'sunos', 'irix', 'osf', -'hp-ux', -'netbsd', -'bsdi', -'freebsd', -'openbsd', -'gnu.hurd', -'unix','x11', +'hp\-ux', +'unix', +'x11', # Other famous OS 'beos', 'os/2', @@ -81,19 +98,30 @@ # Macintosh OS family 'mac[_+ ]os[_+ ]x','macosx', 'mac[_+ ]?p','macintosh','mac[_+ ]68','macintosh','macweb','macintosh','macintosh','macintosh', -# Unix like OS +# Linux family (linuxyyy) +'linux(.*)centos','linuxcentos', +'linux(.*)debian','linuxdebian', +'linux(.*)fedora','linuxfedora', +'linux(.*)mandr','linuxmandr', +'linux(.*)red[_+ ]hat','linuxredhat', +'linux(.*)suse','linuxsuse', +'linux(.*)ubuntu','linuxubuntu', 'linux','linux', +# Hurd family +'gnu.hurd','gnu', +# BSDs family (bsdyyy) +'netbsd','bsdnetbsd', +'bsdi','bsdi', +'freebsd','bsdfreebsd', +'openbsd','bsdopenbsd', +# Other Unix, Unix-like 'aix','aix', 'sunos','sunos', 'irix','irix', 'osf','osf', -'hp-ux','hp-ux', -'netbsd','netbsd', -'bsdi','bsdi', -'freebsd','freebsd', -'openbsd','openbsd', -'gnu.hurd','gnu', -'unix','unix','x11','unix', +'hp\-ux','hp\-ux', +'unix','unix', +'x11','unix', # Other famous OS 'beos','beos', 'os/2','os/2', @@ -127,18 +155,31 @@ # Macintosh OS 'macosx','Mac OS X', 'macintosh','Mac OS', -# Unix like OS -'linux','GNU Linux', +# Linux +'linuxcentos','Centos', +'linuxdebian','Debian', +'linuxfedora','Fedora', +'linuxmandr','Mandriva (or Mandrake)', +'linuxredhat','Red Hat', +'linuxsuse','Suse', +'linuxubuntu','Ubuntu', +'linux','GNU Linux (Unknown or unspecified distribution)', +# Hurd +'gnu','GNU Hurd', +# BSDs +'netbsd','NetBSD', # For backard compatibility +'bsdnetbsd','NetBSD', +'bsdi','BSDi', +'freebsd','FreeBSD', # For backard compatibility +'bsdfreebsd','FreeBSD', +'openbsd','OpenBSD', # For backard compatibility +'bsdopenbsd','OpenBSD', +# Other Unix, Unix-like 'aix','Aix', 'sunos','Sun Solaris', 'irix','Irix', 'osf','OSF Unix', -'hp-ux','HP Unix', -'netbsd','NetBSD', -'bsdi','BSDi', -'freebsd','FreeBSD', -'openbsd','OpenBSD', -'gnu','GNU Hurd', +'hp\-ux','HP Unix', 'unix','Unknown Unix system', # Other famous OS 'beos','BeOS', diff --git a/wwwroot/cgi-bin/lib/robots.pm b/wwwroot/cgi-bin/lib/robots.pm index 09c36e9d..2feebac1 100644 --- a/wwwroot/cgi-bin/lib/robots.pm +++ b/wwwroot/cgi-bin/lib/robots.pm @@ -5,16 +5,59 @@ #------------------------------------------------------- # $Revision$ - $Author$ - $Date$ +# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html: +# added dipsie (not tested with real data). +# added DomainsDB.net http://domainsdb.net/ +# added ia_archiver-web.archive.org (was inadvertently grouped with Alexa traffic) +# added Nutch (used by looksmart (furl?)) +# added rssImagesBot +# added Sqworm +# added t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e +# added w3c css-validator +# added documentation link to bot home pages for above and selected major bots. +# In the case of international bots, choose .com page. +# Included tool tip (html "title"). +# To do: parameterize to match both AWStats language and tooltips settings. +# To do: add html links for all bots based on current documentation in source +# files referenced below. +# changed '\wbot[\/\-]', to '\wbot[\/\-]' (removed comma) +# made minor grammar corrections to notes below +# 2005-08-24 added YahooSeeker-Testing +# added w3c-checklink +# updated url for ask.com +# 2005-08-24 added Girafabot http://www.girafa.com/ +# 2005-08-30 added PluckFeedCrawler http://www.pluck.com/ +# added Gaisbot/3.0 (robot05@gais.cs.ccu.edu.tw; ) +# dded geniebot (wgao@genieknows.com) +# added BecomeBot link http://www.become.com/site_owners.html +# added topicblogs http://www.topicblogs.com/ +# added Powermarks; seen used by referrer spam +# added YahooSeeker +# added NG/2. http://www.exabot.com/ +# 2005-09-15 added link for Walhello appie +# added bender focused_crawler +# updated YahooSeeker description (blog crawler) +# 2005-09-16 added link for http://linkchecker.sourceforge.net +# added ConveraCrawler/0.9d ( http://www.authoritativeweb.com/crawl) +# added Blogslive info@blogslive.com intelliseek.com +# added BlogPulse (ISSpider-3.0) intelliseek.com +# 2005-09-26 added Feedfetcher-Google (http://www.google.com/feedfetcher.html) +# added EverbeeCrawler +# added Yahoo-Blogs http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html +# added link for Bloglines http://www.bloglines.com +# 2005-10-19 fixed Feedfetcher-Google (http://www.google.com/feedfetcher.html) +# added Blogshares Spiders (Synchronized V1.5.1) +# added yacy #package AWSROB; # Robots list was found at http://www.robotstxt.org/wc/active/all.txt # Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html -# Rem: To avoid bad detection, some robots id were removed from this list: +# Rem: To avoid bad detection, some robot's ids were removed from this list: # - Robots with ID of 3 letters only -# - Robot called 'webs' and 'tcl' -# Rem: Some robot most used for download are also removed: wget +# - Robots called 'webs' and 'tcl' +# Rem: Some robots mostly used for downloading have also been removed, i.e. wget # Rem: directhit changed into direct_hit (its real id) # Rem: calif changed into calif[^r] to avoid confusion between Tiscalifreenet browser # Rem: fish changed into [^a]fish to avoid confusion between Madsafish browser @@ -25,7 +68,7 @@ # RobotsSearchIDOrder # It contains all matching criteria to search for in log fields. This list is # used to know in which order to search Robot IDs. -# Most frequent one are in list1, used when LevelForRobotsDetection is 1 or more +# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more # Minor robots are in list2, used when LevelForRobotsDetection is 2 or more # Note: Robots IDs are in lower case, ' ' and '+' are changed into '_' and are quoted. #------------------------------------------------------- @@ -55,6 +98,7 @@ 'digout4u', 'echo', 'fast\-webcrawler', +'ia_archiver\-web\.archive\.org', # Must be before ia_archiver to avoid confusion with alexa 'ia_archiver', 'jennybot', 'mercator', @@ -79,6 +123,7 @@ 'araneo', 'aretha', 'ariadne', +'powermarks', 'arks', 'aspider', 'atn\.txt', @@ -269,6 +314,7 @@ 'spiderman', 'spiderview', 'spry', +'sqworm', 'ssearcher', 'suke', 'suntek', @@ -330,18 +376,30 @@ 'awbot', 'baiduspider', 'becomebot', +'bender', 'bloglines', +'blogpulse', +'blogshares', +'blogslive', 'bobby', 'boris', 'bumblebee', +'converacrawler', 'cscrawler', 'daviesbot', 'daypopbot', +'dipsie\.bot', +'domainsdb\.net', 'exactseek', +'everbeecrawler', 'ezresult', 'feedburner', +'feedfetcher\-google', 'feedster', +'gaisbot', +'geniebot', 'gigabot', +'girafabot', 'gnodspider', 'grub', 'henrythemiragorobot', @@ -360,37 +418,50 @@ 'msiecrawler', 'nagios', 'newsgatoronline', +'nutch', 'perman', +'pluckfeedcrawler', 'pompos', 'popdexter', 'rambler', 'redalert', 'rojo', +'rssimagesbot', 'shoutcast', 'slysearch', 'surveybot', 'syndic8', 'technoratibot', +'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e', +'topicblogs', 'turnitinbot', 'turtlescanner', # Must be before turtle 'turtle', 'ultraseek', +'w3c\-checklink', +'w3c_css_validator_jfouffa', 'w3c_validator', 'webclipping\.com', 'webcompass', 'wonderer', +'yacy', +'yahoo\-blogs', 'yahoo\-verticalcrawler', 'yahoofeedseeker', +'yahooseeker\-testing', +'yahooseeker', 'yandex', 'zealbot', -'zyborg' +'zyborg', +'ng\/1\.', # put at end to avoid false positive +'ng\/2\.' # put at end to avoid false positive ); @RobotsSearchIDOrder_listgen = ( # Generic robot 'robot', 'crawl', 'spider', -'\wbot[\/\-]', +'\wbot[\/\-]' ); @@ -400,12 +471,12 @@ #------------------------------------------------------- %RobotsHashIDLib = ( # Common robots (In robot file) -'appie','Walhello appie', +'appie','Walhello appie', 'architext','ArchitextSpider', -'jeeves','AskJeeves', +'jeeves','AskJeeves', 'bjaaland','Bjaaland', 'ferret','Wild Ferret Web Hopper #1, #2, #3', -'googlebot','Googlebot', +'googlebot','Googlebot', 'gulliver','Northern Light Gulliver', 'harvest','Harvest', 'htdig','ht://Dig', @@ -416,7 +487,7 @@ 'myweb','Internet Shinchakubin', 'nomad','Nomad', 'scooter','Scooter', -'slurp','Yahoo Slurp', +'slurp','Yahoo Slurp', '^voyager\/','Voyager', 'weblayers','weblayers', # Common robots (Not in robot file) @@ -424,11 +495,12 @@ 'digout4u','Digout4u', 'echo','EchO!', 'fast\-webcrawler','Fast-Webcrawler', -'ia_archiver','Alexa (IA Archiver)', +'ia_archiver\-web\.archive\.org','The web archive (IA Archiver)', +'ia_archiver','Alexa (IA Archiver)', 'jennybot','JennyBot', 'mercator','Mercator', -'msnbot','MSNBot', -'netcraft','Netcraft', +'msnbot','MSNBot', +'netcraft','Netcraft', 'petersnews','Petersnews', 'unlost_web_crawler','Unlost Web Crawler', 'voila','Voila', @@ -446,6 +518,7 @@ 'araneo','Araneo', 'aretha','Aretha', 'ariadne','ARIADNE', +'powermarks','Powermarks', # must come before Arks; seen used by referrer spam 'arks','arks', 'aspider','ASpider (Associative Spider)', 'atn\.txt','ATN Worldwide', @@ -499,7 +572,7 @@ 'felix','Felix IDE', 'fetchrover','FetchRover', 'fido','fido', -'finnish','Hämähäkki', +'finnish','H���ki', 'fireball','KIT-Fireball', 'fouineur','Fouineur', 'francoroute','Robot Francoroute', @@ -547,7 +620,7 @@ 'kilroy','Kilroy', 'ko_yappo_robot','KO_Yappo_Robot', 'labelgrabber\.txt','LabelGrabber', -'larbin','larbin', +'larbin','larbin', 'legs','legs', 'linkidator','Link Validator', 'linkscan','LinkScan', @@ -570,7 +643,7 @@ 'ndspider','NDSpider', 'nederland\.zoek','Nederland.zoek', 'netcarta','NetCarta WebMap Engine', -'netmechanic','NetMechanic', +'netmechanic','NetMechanic', 'netscoop','NetScoop', 'newscan\-online','newscan-online', 'nhse','NHSE Web Forager', @@ -598,8 +671,8 @@ 'plumtreewebaccessor','PlumtreeWebAccessor', 'poppi','Poppi', 'portalb','PortalB Spider', -'psbot','psbot', -'python','The Python Robot', +'psbot','psbot', +'python','Python-urllib', 'raven','Raven Search', 'rbse','RBSE Spider', 'resumerobot','Resume Robot', @@ -637,6 +710,7 @@ 'spiderview','SpiderView(tm)', 'spry','Spry Wizard Robot', 'ssearcher','Site Searcher', +'sqworm','Sqworm', 'suke','Suke', 'suntek','suntek search engine', 'sven','Sven', @@ -696,19 +770,31 @@ 'asterias', 'Asterias', 'awbot', 'AWBot', 'baiduspider','BaiDuSpider', -'becomebot', 'BecomeBot', -'bloglines','Bloglines', +'becomebot', 'BecomeBot', +'bender','bender focused_crawler', +'bloglines','Bloglines', +'blogpulse','BlogPulse ISSpider intelliseek.com', +'blogshares','Blogshares Spiders', +'blogslive','Blogslive', 'bobby', 'Bobby', 'boris', 'Boris', 'bumblebee', 'Bumblebee (relevare.com)', +'converacrawler','ConveraCrawler', 'cscrawler','CsCrawler', 'daviesbot', 'DaviesBot', 'daypopbot', 'DayPop', +'dipsie\.bot','Dipsie', +'domainsdb\.net','DomainsDB.net', 'exactseek','ExactSeek Crawler', -'ezresult', 'Ezresult', +'everbeecrawler','EverbeeCrawler', +'ezresult', 'Ezresult', 'feedburner', 'Feedburner', -'feedster', 'Feedster', +'feedfetcher\-google','Feedfetcher-Google', +'feedster', 'Feedster', +'geniebot','geniebot', +'gaisbot','Gaisbot', 'gigabot','GigaBot', +'girafabot','Girafabot', 'gnodspider','GNOD Spider', 'grub','Grub.org', 'henrythemiragorobot', 'Mirago', @@ -718,40 +804,52 @@ 'kinjabot', 'Kinjabot', 'kinja\-imagebot', 'Kinja Imagebot', 'linkbot','LinkBot', -'linkchecker','LinkChecker', +'linkchecker','LinkChecker', 'livejournal\.com', 'LiveJournal.com', 'magpierss', 'MagpieRSS', -'mediapartners\-google','Google AdSense', +'mediapartners\-google','Google AdSense', 'metager\-linkchecker','MetaGer LinkChecker', 'microsoft_url_control','Microsoft URL Control', 'nagios','Nagios', 'newsgatoronline', 'NewsGator Online', +'nutch','Nutch', 'msiecrawler','MSIECrawler', 'perman', 'Perman surfer', +'pluckfeedcrawler','PluckFeedCrawler', 'pompos','Pompos', 'popdexter','Popdexter', 'rambler', 'StackRambler', 'redalert', 'Red Alert', -'rojo', 'RoJo', +'rojo', 'RoJo aggregator', +'rssimagesbot','rssImagesBot', 'shoutcast','Shoutcast Directory Service', 'slysearch','SlySearch', 'surveybot','SurveyBot', 'syndic8','Syndic8', 'technoratibot', 'Technoratibot', +'t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','T-H-U-N-D-E-R-S-T-O-N-E', +'topicblogs', 'topicblogs', 'turnitinbot','Turn It In', 'turtle', 'Turtle', 'turtlescanner', 'Turtle', 'ultraseek', 'Ultraseek', -'w3c_validator', 'W3C Validator', +'w3c\-checklink','W3C Link Checker', +'w3c_css_validator_jfouffa', 'W3C jigsaw CSS Validator', +'w3c_validator', 'W3C Validator', 'webclipping\.com', 'WebClipping.com', 'webcompass', 'webcompass', 'wonderer', 'Web Wombat Redback Spider', +'yacy','yacy', +'yahoo\-blogs','Yahoo-Blogs', 'yahoo\-verticalcrawler', 'Yahoo Vertical Crawler', -'yahoofeedseeker', 'Yahoo Feed Seeker', +'yahoofeedseeker', 'Yahoo Feed Seeker', +'yahooseeker\-testing', 'YahooSeeker-Testing', +'yahooseeker', 'YahooSeeker Yahoo! Blog crawler', 'yandex', 'Yandex bot', 'zealbot','ZealBot', 'zyborg','Zyborg', - +'ng\/1\.','NG 1.x (Exalead)', # put at end to avoid false positive +'ng\/2\.','NG 2.x (Exalead)', # put at end to avoid false positive # Generic root ID 'robot', 'Unknown robot (identified by \'robot\')', 'crawl', 'Unknown robot (identified by \'crawl\')', @@ -769,12 +867,11 @@ 'fast\-webcrawler'=>'AllTheWeb', 'googlebot'=>'Google', 'msnbot'=>'MSN', +'nutch'=>'Looksmart', 'scooter'=>'AltaVista', 'wisenutbot'=>'Looksmart', 'yahoo\-verticalcrawler'=>'Yahoo', 'zyborg'=>'Looksmart' ); - - 1; diff --git a/wwwroot/cgi-bin/lib/search_engines.pm b/wwwroot/cgi-bin/lib/search_engines.pm index 31824e08..9f7907a4 100644 --- a/wwwroot/cgi-bin/lib/search_engines.pm +++ b/wwwroot/cgi-bin/lib/search_engines.pm @@ -7,6 +7,26 @@ #------------------------------------------------------------------------------ # $Revision$ - $Author$ - $Date$ +# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html: +# added minor italian search engines +# arianna http://arianna.libero.it/ +# supereva http://search.supereva.com/ +# kataweb http://kataweb.it/ +# corrected uk looksmart +# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmart','key=', +# to +# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=', +# corrected spelling +# internationnal -> international +# added 'google\.'=>'mail\.google\.', to NotSearchEnginesKeys in order to +# avoid counting gmail referrals as search engine traffic +# 2005-08-21 Sean Carlos http://www.antezeta.com/awstats.html: +# avoid counting babelfish.altavista referrals as search engine traffic +# avoid counting translate.google referrals as search engine traffic +# 2005-11-20 Sean Carlos +# added missing 'tiscali','key=', entry. Check order +# 2005-11-22 Sean Carlos +# added Google Base & Froogle. Froogle not tested. #package AWSSE; @@ -19,7 +39,9 @@ # Note: Regex IDs are in lower case and ' ' and '+' are changed into '_' #------------------------------------------------------------------------------ @SearchEnginesSearchIDOrder_list1=( -# Major internationnal search engines +# Major international search engines +'base\.google\.', +'froogle\.google\.', 'images\.google\.', 'google\.','216\.239\.(35\.101|37\.101|39\.100|39\.101|51\.100|51\.101|35\.100)', 'msn\.', @@ -41,7 +63,7 @@ ); @SearchEnginesSearchIDOrder_list2=( -# Minor internationnal search engines +# Minor international search engines 'northernlight\.', 'hotbot\.', 'kvasir\.', @@ -92,7 +114,7 @@ # Minor hungarian search engines 'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu', # Minor italian search engines -'virgilio\.it', +'virgilio\.it','arianna\.libero\.it','supereva\.com','kataweb\.it', # Minor norvegian search engines 'sok\.start\.no', # Minor polish search engines @@ -115,6 +137,9 @@ # really a search engine #------------------------------------------------------------------------------ %NotSearchEnginesKeys=( +'altavista\.'=>'babelfish\.altavista\.', +'google\.'=>'mail\.google\.', +'google\.'=>'translate\.google\.', 'msn\.'=>'hotmail\.msn\.', 'yahoo\.'=>'mail\.yahoo\.' ); @@ -124,7 +149,9 @@ # Each Search Engine Search ID is associated to an AWStats id string #------------------------------------------------------------------------------ %SearchEnginesHashID = ( -# Major internationnal search engines +# Major international search engines +'base\.google\.','google_base', +'froogle\.google\.','google_froogle', 'images\.google\.','google_image', 'google\.','google','216\.239\.(35\.101|37\.101|39\.100|39\.101|51\.100|51\.101|35\.100)','google', 'msn\.','msn', @@ -143,7 +170,7 @@ 'search\.aol\.co','aol', 'search\.sli\.sympatico\.ca','sympatico', 'excite\.','excite', -# Minor internationnal search engines +# Minor international search engines 'northernlight\.','northernlight', 'hotbot\.','hotbot', 'kvasir\.','kvasir', @@ -244,6 +271,9 @@ 'search\.internetto\.hu','internetto', # Minor italian search engines 'virgilio\.it','virgilio', +'arianna\.libero\.it','arianna', +'supereva\.com','supereva', +'kataweb\.it','kataweb', # Minor norvegian search engines 'sok\.start\.no','start', # Minor polish search engines @@ -281,11 +311,14 @@ 'altavista','q=', 'a9','a9\.com\/', 'dmoz','search=', -'google','(p|q|as_p|as_q)=', +'google_base','(p|q|as_p|as_q)=', +'google_froogle','(p|q|as_p|as_q)=', 'google_image','(p|q|as_p|as_q)=', +'google','(p|q|as_p|as_q)=', 'lycos','query=', 'msn','q=', 'netscape','search=', +'tiscali','key=', 'aol','query=', 'terra','query=', 'voila','kw=', @@ -293,7 +326,7 @@ 'yahoo','p=', 'sympatico', 'query=', 'excite','search=', -# Minor internationnal search engines +# Minor international search engines 'go','qt=', 'ask','ask=', 'atomz','sp-q=', @@ -313,6 +346,9 @@ 'spray','string=', 'teoma','q=', 'virgilio','qs=', +'arianna','query=', +'supereva','q=', +'kataweb','q=', 'webcrawler','searchText=', 'wisenut','query=', 'ixquick', 'query=', @@ -329,7 +365,7 @@ # Minor dutch search engines 'ilse','search_for=', 'vindex','in=', # Minor english search engines -'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmart','key=', +'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=', 'mirago','txtsearch=', 'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=', # Minor finnish search engines 'haku','w=', @@ -385,14 +421,16 @@ # 'search_engine_id', 'search_engine_name', #------------------------------------------------------------------------------ %SearchEnginesHashLib=( -# Major internationnal search engines +# Major international search engines 'alexa','Alexa', 'alltheweb','AllTheWeb', 'altavista','AltaVista', 'a9', 'A9.com', 'dmoz','DMOZ', -'google','Google', +'google_base','Google (Base)', +'google_froogle','Froogle (Google)', 'google_image','Google (Images)', +'google','Google', 'lycos','Lycos', 'msn','MSN', 'netscape','Netscape', @@ -404,7 +442,7 @@ 'yahoo','Yahoo', 'sympatico', 'Sympatico', 'excite','Excite', -# Minor internationnal search engines +# Minor international search engines 'go','Go.com', 'ask','Ask Jeeves', 'atomz','Atomz', @@ -457,7 +495,7 @@ # Minor hungarian search engines 'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkeresõ', 'goliat','Góliát', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Keresõ', # Minor italian search engines -'virgilio','Virgilio', +'virgilio','Virgilio', 'arianna','Arianna', 'supereva','Supereva', 'kataweb','Kataweb', # Minor norvegian search engines 'start','start.no', # Minor polish search engines