From: eldy <> Date: Thu, 25 May 2006 00:18:32 +0000 (+0000) Subject: Update search engine database X-Git-Tag: AWSTATS_6_6_BETA~38 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2b846ca8759c680101ccdefab8552916d41adfb4;p=thirdparty%2FAWStats.git Update search engine database --- diff --git a/wwwroot/cgi-bin/lib/search_engines.pm b/wwwroot/cgi-bin/lib/search_engines.pm index b6f4a2f0..95172510 100644 --- a/wwwroot/cgi-bin/lib/search_engines.pm +++ b/wwwroot/cgi-bin/lib/search_engines.pm @@ -20,13 +20,55 @@ # internationnal -> international # added 'google\.'=>'mail\.google\.', to NotSearchEnginesKeys in order to # avoid counting gmail referrals as search engine traffic -# 2005-08-21 Sean Carlos http://www.antezeta.com/awstats.html: +# 2005-08-21 Sean Carlos http://www.antezeta.com/awstats.html # avoid counting babelfish.altavista referrals as search engine traffic # avoid counting translate.google referrals as search engine traffic # 2005-11-20 Sean Carlos # added missing 'tiscali','key=', entry. Check order # 2005-11-22 Sean Carlos # added Google Base & Froogle. Froogle not tested. +# 2006-04-18 Sean Carlos http://www.antezeta.com/awstats.html +# added biglotron.com (France) +# added blingo http://www.blingo.com/ +# added Clusty & Vivisimo +# added eniro.no (Norway) [https://sourceforge.net/forum/message.php?msg_id=3134783] +# added GPU p2p search http://search.centraldatabase.org/ +# added mail.tiscali to "not search engines list" [https://sourceforge.net/forum/message.php?msg_id=3166688] +# added Ask group's "mysearch" +# added sify.com (India) +# added sogou.com (Cina) [https://sourceforge.net/forum/message.php?msg_id=3501603] +# Ask changes: +# - added Ask Japan (ask.jp) +# - break out Ask new country level variants (DE, ES, FR, IT, NL) +# - updated Ask name from Ask Jevees +# - added Ask q= parameter - many recent searches probably not recognized; [https://sourceforge.net/forum/message.php?msg_id=3465444] +# - updated Ask uk (new uk.ask.com added to older ask.co.uk) +# updated voila kw|rdata parameter [https://sourceforge.net/forum/message.php?msg_id=3373912] +# for each new engine, added link to Search Engine. This serves to document engine. Done for major & Italian engines as well. Requires patch +# to AWStats to allow untranslated html. Otherwise html will appear instead of link. +# reviewed mnoGoSearch (http://www.mnogosearch.org/); the search engined mentioned no longer +# exists https://sourceforge.net/forum/message.php?msg_id=3025426 +# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html +# added 10 Chello European broadband portals (Austria, Belgium, Czech Republic, France, Hungary, The Netherlands, Norway, Poland, Slovakia, Sweden) +# added Alice Internal Search (blends data with Google?) search.alice.it.master:10005 +# added detection of google cache views from IPs 66.249.93.104 72.14.203.104 72.14.207.104 +# To do: add more extensive IP list; keywords not yet detected. +# added icerocket.com blog search http://www.icerocket.com/ +# added live.com (msn) http://www.live.com/ +# added Meta motor kartoo. Note: Kartoo does not provide search words in referrers, thus the engine will appear in the +# search engine list but the actual search words are not available. +# added netluchs.de http://www.netluchs.de/ +# added sphere.com blog search http://www.sphere.com/ +# added wwweasel.de http://wwweasel.de +# added Yahoo Mindset! http://mindset.research.yahoo.com/ +# updated Mirago query parameter recognition (qry=); added breakout for each country (France, Germany, Spain, Italy, Norway, Sweden, Denmark, Netherlands, Belgium, Switzerland) +# 2006-05-13 +# added Google cache IPs 64.233.183.104 & 66.102.7.104 +# 2006-05-20 +# anzwers.com.au +# schoenerbrausen.de http://www.schoenerbrausen.de/ +# 216.239.59.104 +# answerbus http://www.answerbus.com/ #package AWSSE; @@ -43,9 +85,15 @@ 'base\.google\.', 'froogle\.google\.', 'images\.google\.', -'google\.','216\.239\.(35\.101|37\.101|39\.100|39\.101|51\.100|51\.101|35\.100)', +'google\.','216\.239\.(35\.101|37\.101|39\.100|39\.101|51\.100|51\.101|35\.100|59\.104)', +'64\.233\.183\.104', +'66\.102\.7\.104', +'66\.249\.93\.104', +'72\.14\.(203\.104|207\.104)', 'msn\.', +'live\.com', 'voila\.', +'mindset\.research\.yahoo', 'yahoo\.','(66\.218\.71\.225|216\.109\.117\.135)', 'search\.aol\.co', 'tiscali\.', @@ -75,6 +123,12 @@ 'looksmart\.', 'spray\.', 'nbci\.com/search', +'de\.ask.\com', # break out Ask country specific engines. (.jp is in Japan section) +'es\.ask.\com', +'fr\.ask.\com', +'it\.ask.\com', +'nl\.ask.\com', +'uk\.ask.\com', '(^|\.)ask\.com', 'atomz\.', 'overture\.com', # Replace 'goto\.com','Goto.com', @@ -88,10 +142,46 @@ 'ixquick\.com', 'search\.earthlink\.net', 'i-une\.com', +'blingo\.com', +'centraldatabase\.org', +'clusty\.com', +'mysearch\.', +'vivisimo\.com', +'kartoo\.com', +'icerocket\.com', +'sphere\.com', +# Chello Portals +'chello\.at', +'chello\.be', +'chello\.cz', +'chello\.fr', +'chello\.hu', +'chello\.nl', +'chello\.no', +'chello\.pl', +'chello\.se', +'chello\.sk', +'chello', # required as catchall for new countries not yet known +# Mirago +'mirago\.be', +'mirago\.ch', +'mirago\.de', +'mirago\.dk', +'es\.mirago\.com', +'mirago\.fr', +'mirago\.it', +'mirago\.nl', +'no\.mirago\.com', +'mirago\.se', +'mirago\.co\.uk', +'mirago', # required as catchall for new countries not yet known +'answerbus\.com', +# Minor Australian search engines +'anzwers\.com\.au', # Minor brazilian search engines 'engine\.exe', 'miner\.bol\.com\.br', # Minor chinese search engines -'baidu\.com','search\.sina\.com','search\.sohu\.com', +'baidu\.com','search\.sina\.com','search\.sohu\.com', 'sogou\.com', # Minor czech search engines 'atlas\.cz','seznam\.cz','quick\.cz','centrum\.cz','jyxo\.(cz|com)','najdi\.to','redbox\.cz', # Minor danish search-engines @@ -99,24 +189,31 @@ # Minor dutch search engines 'ilse\.','vindex\.', # Minor english search engines -'(^|\.)ask\.co\.uk','bbc\.co\.uk/cgi-bin/search','ifind\.freeserve','looksmart\.co\.uk','mirago\.','splut\.','spotjockey\.','ukdirectory\.','ukindex\.co\.uk','ukplus\.','searchy\.co\.uk', +'(^|\.)ask\.co\.uk','bbc\.co\.uk/cgi-bin/search','ifind\.freeserve','looksmart\.co\.uk','splut\.','spotjockey\.','ukdirectory\.','ukindex\.co\.uk','ukplus\.','searchy\.co\.uk', # Minor finnish search engines 'haku\.www\.fi', # Minor french search engines 'recherche\.aol\.fr','ctrouve\.','francite\.','\.lbb\.org','rechercher\.libertysurf\.fr', 'search[\w\-]+\.free\.fr', 'recherche\.club-internet\.fr', -'toile\.com', +'toile\.com', 'biglotron\.com', # Minor german search engines 'sucheaol\.aol\.de', 'fireball\.de','infoseek\.de','suche\d?\.web\.de','[a-z]serv\.rrzn\.uni-hannover\.de', 'suchen\.abacho\.de','brisbane\.t-online\.de','allesklar\.de','meinestadt\.de', '212\.227\.33\.241', '(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)', +'wwweasel\.de', +'netluchs\.de', +'schoenerbrausen\.de', # Minor hungarian search engines 'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu', +# Minor Indian search engines +'sify\.com', # Minor italian search engines -'virgilio\.it','arianna\.libero\.it','supereva\.com','kataweb\.it', -# Minor norvegian search engines -'sok\.start\.no', +'virgilio\.it','arianna\.libero\.it','supereva\.com','kataweb\.it','search\.alice\.it\.master', +# Minor Japanese search engines +'ask\.jp', +# Minor Norwegian search engines +'sok\.start\.no', 'eniro\.no', # Minor polish search engines 'szukaj\.wp\.pl', # Minor russian search engines @@ -141,6 +238,7 @@ 'google\.'=>'mail\.google\.', 'google\.'=>'translate\.google\.', 'msn\.'=>'hotmail\.msn\.', +'tiscali\.'=>'mail\.tiscali\.', 'yahoo\.'=>'mail\.yahoo\.' ); @@ -153,9 +251,15 @@ 'base\.google\.','google_base', 'froogle\.google\.','google_froogle', 'images\.google\.','google_image', -'google\.','google','216\.239\.(35\.101|37\.101|39\.100|39\.101|51\.100|51\.101|35\.100)','google', +'google\.','google','216\.239\.(35\.101|37\.101|39\.100|39\.101|51\.100|51\.101|35\.100|59\.104)','google', +'64\.233\.183\.104','google_cache', +'66\.102\.7\.104','google_cache', +'66\.249\.93\.104','google_cache', +'72\.14\.(203\.104|207\.104)','google_cache', 'msn\.','msn', +'live\.com','live', 'voila\.','voila', +'mindset\.research\.yahoo','yahoo_mindset', 'yahoo\.','yahoo','(66\.218\.71\.225|216\.109\.117\.135)','yahoo', 'lycos\.','lycos', 'alexa\.com','alexa', @@ -182,6 +286,13 @@ 'looksmart\.','looksmart', 'spray\.','spray', 'nbci\.com/search','nbci', +'de\.ask.\com','askde', # break out Ask country specific engines. +'es\.ask.\com','askes', +'fr\.ask.\com','askfr', +'it\.ask.\com','askit', +'nl\.ask.\com','asknl', +'uk\.ask.\com','askuk', +'(^|\.)ask\.co\.uk','askuk', '(^|\.)ask\.com','ask', 'atomz\.','atomz', 'overture\.com','overture', # Replace 'goto\.com','Goto.com', @@ -195,6 +306,42 @@ 'ixquick\.com','ixquick', 'search\.earthlink\.net','earthlink', 'i-une\.com','iune', +'blingo\.com','blingo', +'centraldatabase\.org','centraldatabase', +'clusty\.com','clusty', +'mysearch\.','mysearch', +'vivisimo\.com','vivisimo', +'kartoo\.com','kartoo', +'icerocket\.com','icerocket', +'sphere\.com','sphere', +# Chello Portals +'chello\.at','chelloat', +'chello\.be','chellobe', +'chello\.cz','chellocz', +'chello\.fr','chellofr', +'chello\.hu','chellohu', +'chello\.nl','chellonl', +'chello\.no','chellono', +'chello\.pl','chellopl', +'chello\.se','chellose', +'chello\.sk','chellosk', +'chello','chellocom', +# Mirago +'mirago\.be','miragobe', +'mirago\.ch','miragoch', +'mirago\.de','miragode', +'mirago\.dk','miragodk', +'es\.mirago\.com','miragoes', +'mirago\.fr','miragofr', +'mirago\.it','miragoit', +'mirago\.nl','miragonl', +'no\.mirago\.com','miragono', +'mirago\.se','miragose', +'mirago\.co\.uk','miragocouk', +'mirago','mirago', # required as catchall for new countries not yet known +'answerbus\.com','answerbus', +# Minor Australian search engines +'anzwers\.com\.au','anzwers', # Minor brazilian search engines 'engine\.exe','engine', 'miner\.bol\.com\.br','miner', @@ -202,6 +349,7 @@ 'baidu\.com','baidu', 'search\.sina\.com','sina', 'search\.sohu\.com','sohu', +'sogou\.com','sogou', # Minor czech search engines 'atlas\.cz','atlas', 'seznam\.cz','seznam', @@ -226,11 +374,9 @@ 'ilse\.','ilse', 'vindex\.','vindex', # Minor english search engines -'(^|\.)ask\.co\.uk','askuk', 'bbc\.co\.uk/cgi-bin/search','bbc', 'ifind\.freeserve','freeserve', 'looksmart\.co\.uk','looksmartuk', -'mirago\.','mirago', 'splut\.','splut', 'spotjockey\.','spotjockey', 'ukdirectory\.','ukdirectory', @@ -248,6 +394,7 @@ 'search[\w\-]+\.free\.fr','free', 'recherche\.club-internet\.fr','clubinternet', 'toile\.com','toile', +'biglotron\.com', 'biglotron', # Minor german search engines 'sucheaol\.aol\.de','aolde', 'fireball\.de','fireball', @@ -260,6 +407,9 @@ 'meinestadt\.de','meinestadt', '212\.227\.33\.241','metaspinner', '(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)','metacrawler_de', +'wwweasel\.de','wwweasel', +'netluchs\.de','netluchs', +'schoenerbrausen\.de','schoenerbrausen', # Minor hungarian search engines 'heureka\.hu','heureka', 'vizsla\.origo\.hu','origo', @@ -269,13 +419,18 @@ 'wahoo\.hu','wahoo', 'webmania\.hu','webmania', 'search\.internetto\.hu','internetto', +# Minor Indian search engines +'sify\.com','sify', # Minor italian search engines 'virgilio\.it','virgilio', 'arianna\.libero\.it','arianna', 'supereva\.com','supereva', 'kataweb\.it','kataweb', -# Minor norvegian search engines -'sok\.start\.no','start', +'search\.alice\.it\.master','aliceitmaster', +# Minor Japanese search engines +'ask\.jp','askjp', +# Minor Norwegian search engines +'sok\.start\.no','start', 'eniro\.no','eniro', # Minor polish search engines 'szukaj\.wp\.pl','wp', # Minor russian search engines @@ -314,21 +469,29 @@ 'google_base','(p|q|as_p|as_q)=', 'google_froogle','(p|q|as_p|as_q)=', 'google_image','(p|q|as_p|as_q)=', +'google_cache','(p|q|as_p|as_q)=cache:*.(?=\+)', 'google','(p|q|as_p|as_q)=', 'lycos','query=', 'msn','q=', +'live','q=', 'netscape','search=', 'tiscali','key=', 'aol','query=', 'terra','query=', -'voila','kw=', +'voila','(kw|rdata)=', 'search.com','q=', +'yahoo_mindset','p=', 'yahoo','p=', 'sympatico', 'query=', 'excite','search=', # Minor international search engines 'go','qt=', -'ask','ask=', +'askde','(ask|q)=', # break out Ask country specific engines. +'askes','(ask|q)=', +'askfr','(ask|q)=', +'askit','(ask|q)=', +'asknl','(ask|q)=', +'ask','(ask|q)=', 'atomz','sp-q=', 'euroseek','query=', 'findarticles','key=', @@ -345,19 +508,52 @@ 'dogpile', 'q(|kw)=', 'spray','string=', 'teoma','q=', -'virgilio','qs=', -'arianna','query=', -'supereva','q=', -'kataweb','q=', 'webcrawler','searchText=', 'wisenut','query=', 'ixquick', 'query=', 'earthlink', 'q=', 'iune','(keywords|q)=', +'blingo','q=', +'centraldatabase','query=', +'clusty','query=', +'mysearch','searchfor=', +'vivisimo','query=', +# kartoo: No keywords passed in referring URL. +'kartoo',, +'icerocket','q=', +'sphere','q=', +# Chello Portals +'chelloat','q1=', +'chellobe','q1=', +'chellocz','q1=', +'chellofr','q1=', +'chellohu','q1=', +'chellonl','q1=', +'chellono','q1=', +'chellopl','q1=', +'chellose','q1=', +'chellosk','q1=', +'chellocom','q1=', +# Mirago +'miragobe','(txtsearch|qry)=', +'miragoch','(txtsearch|qry)=', +'miragode','(txtsearch|qry)=', +'miragodk','(txtsearch|qry)=', +'miragoes','(txtsearch|qry)=', +'miragofr','(txtsearch|qry)=', +'miragoit','(txtsearch|qry)=', +'miragonl','(txtsearch|qry)=', +'miragono','(txtsearch|qry)=', +'miragose','(txtsearch|qry)=', +'miragocouk','(txtsearch|qry)=', +'mirago','(txtsearch|qry)=', +'answerbus','', # Does not provide query parameters +# Minor Australian search engines +'anzwers','search=', # Minor brazilian search engines 'engine','p1=', 'miner','q=', # Minor chinese search engines -'baidu','word=', 'sina', 'word=', 'sohu','word=', +'baidu','word=', 'sina', 'word=', 'sohu','word=', 'sogou', 'query=', # Minor czech search engines 'atlas','searchtext=', 'seznam','w=', 'quick','query=', 'centrum','q=', 'jyxo','s=', 'najdi','dotaz=', 'redbox','srch=', # Minor danish search engines @@ -365,23 +561,37 @@ # Minor dutch search engines 'ilse','search_for=', 'vindex','in=', # Minor english search engines -'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=', -'mirago','txtsearch=', 'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=', +'askuk','(ask|q)=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=', +'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=', # Minor finnish search engines 'haku','w=', # Minor french search engines 'francite','name=', 'clubinternet', 'q=', 'toile', 'q=', +'biglotron','question=', # Minor german search engines 'aolde','q=', 'fireball','q=', 'infoseek','qt=', 'webde','su=', 'abacho','q=', 't-online','q=', 'metaspinner','qry=', 'metacrawler_de','qry=', +'wwweasel','q=', +'netluchs','query=', +'schoenerbrausen','q=', # Minor hungarian search engines 'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=', -# Minor norvegian search engines -'start','q=', +# Minor Indian search engines +'sify','keyword=', +# Minor Italian search engines +'virgilio','qs=', +'arianna','query=', +'supereva','q=', +'kataweb','q=', +'aliceitmaster','qs=', +# Minor Japanese search engines +'askjp','(ask|q)=', +# Minor Norwegian search engines +'start','q=', 'eniro','q=', # Minor polish search engines 'wp','szukaj=', # Minor russian search engines @@ -407,7 +617,7 @@ @WordsToCleanSearchUrl= ('act=','annuaire=','btng=','cat=','categoria=','cfg=','cof=','cou=','count=','cp=','dd=','domain=','dt=','dw=','enc=','exec=','geo=','hc=','height=','hits=','hl=','hq=','hs=','id=','kl=','lang=','loc=','lr=','matchmode=','medor=','message=','meta=','mode=','order=','page=','par=','pays=','pg=','pos=','prg=','qc=','refer=','sa=','safe=','sc=','sort=','src=','start=','style=','stype=','sum=','tag=','temp=','theme=','type=','url=','user=','width=','what=','\\.x=','\\.y=','y=','look='); # SearchEnginesKnownUTFCoding -# Known param that proves a search engines has coded its param in UTF8 +# Known parameter that proves a search engine has coded its parameters in UTF-8 #------------------------------------------------------------------------------ %SearchEnginesKnownUTFCoding=( # Most common search engines @@ -422,29 +632,37 @@ #------------------------------------------------------------------------------ %SearchEnginesHashLib=( # Major international search engines -'alexa','Alexa', -'alltheweb','AllTheWeb', -'altavista','AltaVista', -'a9', 'A9.com', -'dmoz','DMOZ', -'google_base','Google (Base)', -'google_froogle','Froogle (Google)', -'google_image','Google (Images)', -'google','Google', -'lycos','Lycos', -'msn','MSN', -'netscape','Netscape', -'aol','AOL', -'terra','Terra', -'tiscali','Tiscali', -'voila','Voila', -'search.com','Search.com', -'yahoo','Yahoo', -'sympatico', 'Sympatico', -'excite','Excite', +'alexa','Alexa', +'alltheweb','AllTheWeb', +'altavista','AltaVista', +'a9', 'A9', +'dmoz','DMOZ', +'google_base','Google (Base)', +'google_froogle','Froogle (Google)', +'google_image','Google (Images)', +'google_cache','Google (cache)', +'google','Google', +'lycos','Lycos', +'msn','MSN Search', +'live','Windows Live', +'netscape','Netscape', +'aol','AOL', +'terra','Terra', +'tiscali','Tiscali', +'voila','Voila', +'search.com','Search.com', +'yahoo_mindset','Yahoo! Mindset', +'yahoo','Yahoo!', +'sympatico','Sympatico', +'excite','Excite', # Minor international search engines 'go','Go.com', -'ask','Ask Jeeves', +'askde','Ask Deutschland', +'askes','Ask España', # break out Ask country specific engines. +'askfr','Ask France', +'askit','Ask Italia', +'asknl','Ask Nederland', +'ask','Ask', 'atomz','Atomz', 'dejanews','DejaNews', 'euroseek','Euroseek', @@ -459,18 +677,51 @@ 'nbci','NBCI', 'northernlight','NorthernLight', 'overture','Overture', # Replace 'goto\.com','Goto.com', -'dogpile','Dogpile', +'dogpile','Dogpile', 'spray','Spray', -'teoma','Teoma', # Replace 'directhit\.com','DirectHit', -'webcrawler','WebCrawler', +'teoma','Teoma', # Replace 'directhit\.com','DirectHit', +'webcrawler','WebCrawler', 'wisenut','WISENut', -'ixquick', 'ix quick', +'ixquick','ix quick', 'earthlink', 'Earth Link', -'iune','i-une.com', +'iune','i-une', +'blingo','Blingo', +'centraldatabase','GPU p2p search', +'clusty','Clusty', +'mysearch','My Search', +'vivisimo','Vivisimo', +'kartoo','Kartoo', +'icerocket','Icerocket (Blog)', +'sphere','Sphere (Blog)', +# Chello Portals +'chelloat','Chello Austria', +'chellobe','Chello Belgium', +'chellocz','Chello Czech Republic', +'chellofr','Chello France', +'chellohu','Chello Hungary', +'chellonl','Chello Netherlands', +'chellono','Chello Norway', +'chellopl','Chello Poland', +'chellose','Chello Sweden', +'chellosk','Chello Slovakia', +'chellocom','Chello (Country not recognized)', +# Mirago +'miragobe','Mirago Belgium', +'miragoch','Mirago Switzerland', +'miragode','Mirago Germany', +'miragodk','Mirago Denmark', +'miragoes','Mirago Spain', +'miragofr','Mirago France', +'miragoit','Mirago Italy', +'miragonl','Mirago Netherlands', +'miragono','Mirago Norway', +'miragose','Mirago Sweden', +'miragocouk','Mirago UK', +'mirago','Mirago (country unknown)', # Minor brazilian search engines 'engine','Cade', 'miner','Meta Miner', # Minor chinese search engines -'baidu','Baidu', 'sina','Sina', 'sohu','Sohu', +'baidu','Baidu', 'sina','Sina', 'sohu','Sohu', 'sogou','Sogou', # Minor czech search engines 'atlas','Atlas.cz', 'seznam','Seznam', 'quick','Quick.cz', 'centrum','Centrum.cz', 'jyxo','Jyxo.cz', 'najdi','Najdi.to', 'redbox','RedBox.cz', # Minor danish search-engines @@ -478,26 +729,39 @@ # Minor dutch search engines 'ilse','Ilse','vindex','Vindex\.nl', # Minor english search engines -'askuk','Ask Jeeves UK', 'bbc','BBC', 'freeserve','Freeserve', 'looksmartuk','Looksmart UK', -'mirago','Mirago', 'splut','Splut', 'spotjockey','Spotjockey', 'ukdirectory','UK Directory', 'ukindex','UKIndex', 'ukplus','UK Plus', 'searchy','searchy.co.uk', +'askuk','Ask UK', +'bbc','BBC', 'freeserve','Freeserve', 'looksmartuk','Looksmart UK', +'splut','Splut', 'spotjockey','Spotjockey', 'ukdirectory','UK Directory', 'ukindex','UKIndex', 'ukplus','UK Plus', 'searchy','searchy.co.uk', # Minor finnish search engines 'haku','Ihmemaa', # Minor french search engines 'aolfr','AOL (fr)', 'ctrouve','C\'est trouvé', 'francite','Francité', 'lbb', 'LBB', 'libertysurf', 'Libertysurf', 'free', 'Free.fr', 'clubinternet', 'Club-internet', 'toile', 'Toile du Québec', -# Minor german search engines +'biglotron','Biglotron', +# Minor German search engines 'aolde','AOL (de)', 'fireball','Fireball', 'infoseek','Infoseek', 'webde','Web.de', 'abacho','Abacho', 't-online','T-Online', 'allesklar','allesklar.de', 'meinestadt','meinestadt.de', 'metaspinner','metaspinner', 'metacrawler_de','metacrawler.de', +'wwweasel','WWWeasel', +'netluchs','Netluchs', +'schoenerbrausen','Schoenerbrausen/', # Minor hungarian search engines 'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkeresõ', 'goliat','Góliát', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Keresõ', +# Minor Indian search engines +'sify','Sify', # Minor italian search engines -'virgilio','Virgilio', 'arianna','Arianna', 'supereva','Supereva', 'kataweb','Kataweb', -# Minor norvegian search engines -'start','start.no', +'virgilio','Virgilio', +'arianna','Arianna', +'supereva','Supereva', +'kataweb','Kataweb', +'aliceitmaster','search.alice.it.master', +# Minor Japanese search engines +'askjp','Ask Japan', +# Minor Norwegian search engines +'start','start.no', 'eniro','Eniro', # Minor polish search engines 'wp','Szukaj', # Minor russian search engines