From: eldy <> Date: Sun, 1 Oct 2006 14:37:54 +0000 (+0000) Subject: Update search_engines file. X-Git-Tag: AWSTATS_6_6_BETA~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=38466daee1836c736fdfedfbb99c2b58d6d32aee;p=thirdparty%2FAWStats.git Update search_engines file. --- diff --git a/wwwroot/cgi-bin/lib/search_engines.pm b/wwwroot/cgi-bin/lib/search_engines.pm index 13b77794..6f8e00b3 100644 --- a/wwwroot/cgi-bin/lib/search_engines.pm +++ b/wwwroot/cgi-bin/lib/search_engines.pm @@ -1,810 +1,1080 @@ -# AWSTATS SEARCH ENGINES DATABASE -#------------------------------------------------------------------------------ -# If you want to add a Search Engine to extend AWStats database detection capabilities, -# you must add an entry in SearchEnginesSearchIDOrder, SearchEnginesHashID and in -# SearchEnginesHashLib. -# An entry if known in SearchEnginesKnownUrl is also welcome. -#------------------------------------------------------------------------------ -# $Revision$ - $Author$ - $Date$ - -# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html -# added minor italian search engines -# arianna http://arianna.libero.it/ -# supereva http://search.supereva.com/ -# kataweb http://kataweb.it/ -# corrected uk looksmart -# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmart','key=', -# to -# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=', -# corrected spelling -# internationnal -> international -# added 'google\.'=>'mail\.google\.', to NotSearchEnginesKeys in order to -# avoid counting gmail referrals as search engine traffic -# 2005-08-21 Sean Carlos http://www.antezeta.com/awstats.html -# avoid counting babelfish.altavista referrals as search engine traffic -# avoid counting translate.google referrals as search engine traffic -# 2005-11-20 Sean Carlos -# added missing 'tiscali','key=', entry. Check order -# 2005-11-22 Sean Carlos -# added Google Base & Froogle. Froogle not tested. -# 2006-04-18 Sean Carlos http://www.antezeta.com/awstats.html -# added biglotron.com (France) -# added blingo http://www.blingo.com/ -# added Clusty & Vivisimo -# added eniro.no (Norway) [https://sourceforge.net/forum/message.php?msg_id=3134783] -# added GPU p2p search http://search.centraldatabase.org/ -# added mail.tiscali to "not search engines list" [https://sourceforge.net/forum/message.php?msg_id=3166688] -# added Ask group's "mysearch" -# added sify.com (India) -# added sogou.com (Cina) [https://sourceforge.net/forum/message.php?msg_id=3501603] -# Ask changes: -# - added Ask Japan (ask.jp) -# - break out Ask new country level variants (DE, ES, FR, IT, NL) -# - updated Ask name from Ask Jevees -# - added Ask q= parameter - many recent searches probably not recognized; [https://sourceforge.net/forum/message.php?msg_id=3465444] -# - updated Ask uk (new uk.ask.com added to older ask.co.uk) -# updated voila kw|rdata parameter [https://sourceforge.net/forum/message.php?msg_id=3373912] -# for each new engine, added link to Search Engine. This serves to document engine. Done for major & Italian engines as well. Requires patch -# to AWStats to allow untranslated html. Otherwise html will appear instead of link. -# reviewed mnoGoSearch (http://www.mnogosearch.org/); the search engined mentioned no longer -# exists https://sourceforge.net/forum/message.php?msg_id=3025426 -# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html -# added 10 Chello European broadband portals (Austria, Belgium, Czech Republic, France, Hungary, The Netherlands, Norway, Poland, Slovakia, Sweden) -# added Alice Internal Search (blends data with Google?) search.alice.it.master:10005 -# added detection of google cache views from IPs 66.249.93.104 72.14.203.104 72.14.207.104 -# To do: add more extensive IP list; keywords not yet detected. -# added icerocket.com blog search http://www.icerocket.com/ -# added live.com (msn) http://www.live.com/ -# added Meta motor kartoo. Note: Kartoo does not provide search words in referrers, thus the engine will appear in the -# search engine list but the actual search words are not available. -# added netluchs.de http://www.netluchs.de/ -# added sphere.com blog search http://www.sphere.com/ -# added wwweasel.de http://wwweasel.de -# added Yahoo Mindset! http://mindset.research.yahoo.com/ -# updated Mirago query parameter recognition (qry=); added breakout for each country (France, Germany, Spain, Italy, Norway, Sweden, Denmark, Netherlands, Belgium, Switzerland) -# 2006-05-13 -# added Google cache IPs 64.233.183.104 & 66.102.7.104 -# 2006-05-20 -# anzwers.com.au -# schoenerbrausen.de http://www.schoenerbrausen.de/ -# 216.239.59.104 -# answerbus http://www.answerbus.com/ - -#package AWSSE; - - -# SearchEnginesSearchIDOrder -# It contains all matching criteria to search for in log fields. This list is -# used to know in which order to search Search Engines IDs. -# Most frequent one are in list1, used when LevelForSearchEnginesDetection is 1 or more -# Minor robots are in list2, used when LevelForSearchEnginesDetection is 2 or more -# Note: Regex IDs are in lower case and ' ' and '+' are changed into '_' -#------------------------------------------------------------------------------ -@SearchEnginesSearchIDOrder_list1=( -# Major international search engines -'base\.google\.', -'froogle\.google\.', -'images\.google\.', -'google\.','216\.239\.(35\.101|37\.101|39\.100|39\.101|51\.100|51\.101|35\.100|59\.104)', -'64\.233\.183\.104', -'66\.102\.7\.104', -'66\.249\.93\.104', -'72\.14\.(203\.104|207\.104)', -'msn\.', -'live\.com', -'voila\.', -'mindset\.research\.yahoo', -'yahoo\.','(66\.218\.71\.225|216\.109\.117\.135)', -'search\.aol\.co', -'tiscali\.', -'lycos\.', -'alexa\.com', -'alltheweb\.com', -'altavista\.', -'a9\.com', -'dmoz\.org', -'netscape\.', -'search\.terra\.', -'www\.search\.com', -'search\.sli\.sympatico\.ca', -'excite\.' -); - -@SearchEnginesSearchIDOrder_list2=( -# Minor international search engines -'northernlight\.', -'hotbot\.', -'kvasir\.', -'webcrawler\.', -'metacrawler\.', -'go2net\.com', -'(^|\.)go\.com', -'euroseek\.', -'looksmart\.', -'spray\.', -'nbci\.com\/search', -'de\.ask.\com', # break out Ask country specific engines. (.jp is in Japan section) -'es\.ask.\com', -'fr\.ask.\com', -'it\.ask.\com', -'nl\.ask.\com', -'uk\.ask.\com', -'(^|\.)ask\.com', -'atomz\.', -'overture\.com', # Replace 'goto\.com','Goto.com', -'teoma\.', -'findarticles\.com', -'infospace\.com', -'mamma\.', -'dejanews\.', -'dogpile\.com', -'wisenut\.com', -'ixquick\.com', -'search\.earthlink\.net', -'i-une\.com', -'blingo\.com', -'centraldatabase\.org', -'clusty\.com', -'mysearch\.', -'vivisimo\.com', -'kartoo\.com', -'icerocket\.com', -'sphere\.com', -# Chello Portals -'chello\.at', -'chello\.be', -'chello\.cz', -'chello\.fr', -'chello\.hu', -'chello\.nl', -'chello\.no', -'chello\.pl', -'chello\.se', -'chello\.sk', -'chello', # required as catchall for new countries not yet known -# Mirago -'mirago\.be', -'mirago\.ch', -'mirago\.de', -'mirago\.dk', -'es\.mirago\.com', -'mirago\.fr', -'mirago\.it', -'mirago\.nl', -'no\.mirago\.com', -'mirago\.se', -'mirago\.co\.uk', -'mirago', # required as catchall for new countries not yet known -'answerbus\.com', -# Minor Australian search engines -'anzwers\.com\.au', -# Minor brazilian search engines -'engine\.exe', 'miner\.bol\.com\.br', -# Minor chinese search engines -'baidu\.com','search\.sina\.com','search\.sohu\.com', 'sogou\.com', -# Minor czech search engines -'atlas\.cz','seznam\.cz','quick\.cz','centrum\.cz','jyxo\.(cz|com)','najdi\.to','redbox\.cz', -# Minor danish search-engines -'opasia\.dk', 'danielsen\.com', 'sol\.dk', 'jubii\.dk', 'find\.dk', 'edderkoppen\.dk', 'netstjernen\.dk', 'orbis\.dk', 'tyfon\.dk', '1klik\.dk', 'ofir\.dk', -# Minor dutch search engines -'ilse\.','vindex\.', -# Minor english search engines -'(^|\.)ask\.co\.uk','bbc\.co\.uk/cgi-bin/search','ifind\.freeserve','looksmart\.co\.uk','splut\.','spotjockey\.','ukdirectory\.','ukindex\.co\.uk','ukplus\.','searchy\.co\.uk', -# Minor finnish search engines -'haku\.www\.fi', -# Minor french search engines -'recherche\.aol\.fr','ctrouve\.','francite\.','\.lbb\.org','rechercher\.libertysurf\.fr', 'search[\w\-]+\.free\.fr', 'recherche\.club-internet\.fr', -'toile\.com', 'biglotron\.com', -# Minor german search engines -'sucheaol\.aol\.de', -'fireball\.de','infoseek\.de','suche\d?\.web\.de','[a-z]serv\.rrzn\.uni-hannover\.de', -'suchen\.abacho\.de','brisbane\.t-online\.de','allesklar\.de','meinestadt\.de', -'212\.227\.33\.241', -'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)', -'wwweasel\.de', -'netluchs\.de', -'schoenerbrausen\.de', -# Minor hungarian search engines -'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu', -# Minor Indian search engines -'sify\.com', -# Minor italian search engines -'virgilio\.it','arianna\.libero\.it','supereva\.com','kataweb\.it','search\.alice\.it\.master', -# Minor Japanese search engines -'ask\.jp', -# Minor Norwegian search engines -'sok\.start\.no', 'eniro\.no', -# Minor polish search engines -'szukaj\.wp\.pl', -# Minor russian search engines -'ya(ndex)?\.ru', 'aport\.ru', 'rambler\.ru', 'turtle\.ru', 'metabot\.ru', -# Minor swedish search engines -'evreka\.passagen\.se', -# Minor swiss search engines -'search\.ch', 'search\.bluewin\.ch' -); -@SearchEnginesSearchIDOrder_listgen=( -# Generic search engines -'search\..*\.\w+' -); - - -# NotSearchEnginesKeys -# If a search engine key is found, we check its exclude list to know if it's -# really a search engine -#------------------------------------------------------------------------------ -%NotSearchEnginesKeys=( -'altavista\.'=>'babelfish\.altavista\.', -'google\.'=>'mail\.google\.', -'google\.'=>'translate\.google\.', -'msn\.'=>'hotmail\.msn\.', -'tiscali\.'=>'mail\.tiscali\.', -'yahoo\.'=>'mail\.yahoo\.', -'yahoo\.'=>'groups\.yahoo\.' -); - - -# SearchEnginesHashID -# Each Search Engine Search ID is associated to an AWStats id string -#------------------------------------------------------------------------------ -%SearchEnginesHashID = ( -# Major international search engines -'base\.google\.','google_base', -'froogle\.google\.','google_froogle', -'images\.google\.','google_image', -'google\.','google','216\.239\.(35\.101|37\.101|39\.100|39\.101|51\.100|51\.101|35\.100|59\.104)','google', -'64\.233\.183\.104','google_cache', -'66\.102\.7\.104','google_cache', -'66\.249\.93\.104','google_cache', -'72\.14\.(203\.104|207\.104)','google_cache', -'msn\.','msn', -'live\.com','live', -'voila\.','voila', -'mindset\.research\.yahoo','yahoo_mindset', -'yahoo\.','yahoo','(66\.218\.71\.225|216\.109\.117\.135)','yahoo', -'lycos\.','lycos', -'alexa\.com','alexa', -'alltheweb\.com','alltheweb', -'altavista\.','altavista', -'a9\.com','a9', -'dmoz\.org','dmoz', -'netscape\.','netscape', -'search\.terra\.','terra', -'www\.search\.com','search.com', -'tiscali\.','tiscali', -'search\.aol\.co','aol', -'search\.sli\.sympatico\.ca','sympatico', -'excite\.','excite', -# Minor international search engines -'northernlight\.','northernlight', -'hotbot\.','hotbot', -'kvasir\.','kvasir', -'webcrawler\.','webcrawler', -'metacrawler\.','metacrawler', -'go2net\.com','go2net', -'(^|\.)go\.com','go', -'euroseek\.','euroseek', -'looksmart\.','looksmart', -'spray\.','spray', -'nbci\.com\/search','nbci', -'de\.ask.\com','askde', # break out Ask country specific engines. -'es\.ask.\com','askes', -'fr\.ask.\com','askfr', -'it\.ask.\com','askit', -'nl\.ask.\com','asknl', -'uk\.ask.\com','askuk', -'(^|\.)ask\.co\.uk','askuk', -'(^|\.)ask\.com','ask', -'atomz\.','atomz', -'overture\.com','overture', # Replace 'goto\.com','Goto.com', -'teoma\.','teoma', -'findarticles\.com','findarticles', -'infospace\.com','infospace', -'mamma\.','mamma', -'dejanews\.','dejanews', -'dogpile\.com','dogpile', -'wisenut\.com','wisenut', -'ixquick\.com','ixquick', -'search\.earthlink\.net','earthlink', -'i-une\.com','iune', -'blingo\.com','blingo', -'centraldatabase\.org','centraldatabase', -'clusty\.com','clusty', -'mysearch\.','mysearch', -'vivisimo\.com','vivisimo', -'kartoo\.com','kartoo', -'icerocket\.com','icerocket', -'sphere\.com','sphere', -# Chello Portals -'chello\.at','chelloat', -'chello\.be','chellobe', -'chello\.cz','chellocz', -'chello\.fr','chellofr', -'chello\.hu','chellohu', -'chello\.nl','chellonl', -'chello\.no','chellono', -'chello\.pl','chellopl', -'chello\.se','chellose', -'chello\.sk','chellosk', -'chello','chellocom', -# Mirago -'mirago\.be','miragobe', -'mirago\.ch','miragoch', -'mirago\.de','miragode', -'mirago\.dk','miragodk', -'es\.mirago\.com','miragoes', -'mirago\.fr','miragofr', -'mirago\.it','miragoit', -'mirago\.nl','miragonl', -'no\.mirago\.com','miragono', -'mirago\.se','miragose', -'mirago\.co\.uk','miragocouk', -'mirago','mirago', # required as catchall for new countries not yet known -'answerbus\.com','answerbus', -# Minor Australian search engines -'anzwers\.com\.au','anzwers', -# Minor brazilian search engines -'engine\.exe','engine', -'miner\.bol\.com\.br','miner', -# Minor chinese search engines -'baidu\.com','baidu', -'search\.sina\.com','sina', -'search\.sohu\.com','sohu', -'sogou\.com','sogou', -# Minor czech search engines -'atlas\.cz','atlas', -'seznam\.cz','seznam', -'quick\.cz','quick', -'centrum\.cz','centrum', -'jyxo\.(cz|com)','jyxo', -'najdi\.to','najdi', -'redbox\.cz','redbox', -# Minor danish search-engines -'opasia\.dk','opasia', -'danielsen\.com','danielsen', -'sol\.dk','sol', -'jubii\.dk','jubii', -'find\.dk','finddk', -'edderkoppen\.dk','edderkoppen', -'netstjernen\.dk','netstjernen', -'orbis\.dk','orbis', -'tyfon\.dk','tyfon', -'1klik\.dk','1klik', -'ofir\.dk','ofir', -# Minor dutch search engines -'ilse\.','ilse', -'vindex\.','vindex', -# Minor english search engines -'bbc\.co\.uk/cgi-bin/search','bbc', -'ifind\.freeserve','freeserve', -'looksmart\.co\.uk','looksmartuk', -'splut\.','splut', -'spotjockey\.','spotjockey', -'ukdirectory\.','ukdirectory', -'ukindex\.co\.uk','ukindex', -'ukplus\.','ukplus', -'searchy\.co\.uk','searchy', -# Minor finnish search engines -'haku\.www\.fi','haku', -# Minor french search engines -'recherche\.aol\.fr','aolfr', -'ctrouve\.','ctrouve', -'francite\.','francite', -'\.lbb\.org','lbb', -'rechercher\.libertysurf\.fr','libertysurf', -'search[\w\-]+\.free\.fr','free', -'recherche\.club-internet\.fr','clubinternet', -'toile\.com','toile', -'biglotron\.com', 'biglotron', -# Minor german search engines -'sucheaol\.aol\.de','aolde', -'fireball\.de','fireball', -'infoseek\.de','infoseek', -'suche\d?\.web\.de','webde', -'[a-z]serv\.rrzn\.uni-hannover\.de','meta', -'suchen\.abacho\.de','abacho', -'brisbane\.t-online\.de','t-online', -'allesklar\.de','allesklar', -'meinestadt\.de','meinestadt', -'212\.227\.33\.241','metaspinner', -'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)','metacrawler_de', -'wwweasel\.de','wwweasel', -'netluchs\.de','netluchs', -'schoenerbrausen\.de','schoenerbrausen', -# Minor hungarian search engines -'heureka\.hu','heureka', -'vizsla\.origo\.hu','origo', -'lapkereso\.hu','lapkereso', -'goliat\.hu','goliat', -'index\.hu','indexhu', -'wahoo\.hu','wahoo', -'webmania\.hu','webmania', -'search\.internetto\.hu','internetto', -# Minor Indian search engines -'sify\.com','sify', -# Minor italian search engines -'virgilio\.it','virgilio', -'arianna\.libero\.it','arianna', -'supereva\.com','supereva', -'kataweb\.it','kataweb', -'search\.alice\.it\.master','aliceitmaster', -# Minor Japanese search engines -'ask\.jp','askjp', -# Minor Norwegian search engines -'sok\.start\.no','start', 'eniro\.no','eniro', -# Minor polish search engines -'szukaj\.wp\.pl','wp', -# Minor russian search engines -'ya(ndex)?\.ru','yandex', -'aport\.ru','aport', -'rambler\.ru','rambler', -'turtle\.ru','turtle', -'metabot\.ru','metabot', -# Minor swedish search engines -'evreka\.passagen\.se','passagen', -# Minor swiss search engines -'search\.ch','searchch', -'search\.bluewin\.ch','bluewin', -# Generic search engines -'search\..*\.\w+','search' -); - - -# SearchEnginesWithKeysNotInQuery -# List of search engines that store keyword as page instead of query parameter -#------------------------------------------------------------------------------ -%SearchEnginesWithKeysNotInQuery=( -'a9',1 # www.a9.com/searckey1%20searchkey2 -); - -# SearchEnginesKnownUrl -# Known rules to extract keywords from a referrer search engine URL -#------------------------------------------------------------------------------ -%SearchEnginesKnownUrl=( -# Most common search engines -'alexa','q=', -'alltheweb','q(|uery)=', -'altavista','q=', -'a9','a9\.com\/', -'dmoz','search=', -'google_base','(p|q|as_p|as_q)=', -'google_froogle','(p|q|as_p|as_q)=', -'google_image','(p|q|as_p|as_q)=', -'google_cache','(p|q|as_p|as_q)=cache:*.(?=\+)', -'google','(p|q|as_p|as_q)=', -'lycos','query=', -'msn','q=', -'live','q=', -'netscape','search=', -'tiscali','key=', -'aol','query=', -'terra','query=', -'voila','(kw|rdata)=', -'search.com','q=', -'yahoo_mindset','p=', -'yahoo','p=', -'sympatico', 'query=', -'excite','search=', -# Minor international search engines -'go','qt=', -'askde','(ask|q)=', # break out Ask country specific engines. -'askes','(ask|q)=', -'askfr','(ask|q)=', -'askit','(ask|q)=', -'asknl','(ask|q)=', -'ask','(ask|q)=', -'atomz','sp-q=', -'euroseek','query=', -'findarticles','key=', -'go2net','general=', -'hotbot','mt=', -'infospace','qkw=', -'kvasir', 'q=', -'looksmart','key=', -'mamma','query=', -'metacrawler','general=', -'nbci','keyword=', -'northernlight','qr=', -'overture','keywords=', -'dogpile', 'q(|kw)=', -'spray','string=', -'teoma','q=', -'webcrawler','searchText=', -'wisenut','query=', -'ixquick', 'query=', -'earthlink', 'q=', -'iune','(keywords|q)=', -'blingo','q=', -'centraldatabase','query=', -'clusty','query=', -'mysearch','searchfor=', -'vivisimo','query=', -# kartoo: No keywords passed in referring URL. -'kartoo',, -'icerocket','q=', -'sphere','q=', -# Chello Portals -'chelloat','q1=', -'chellobe','q1=', -'chellocz','q1=', -'chellofr','q1=', -'chellohu','q1=', -'chellonl','q1=', -'chellono','q1=', -'chellopl','q1=', -'chellose','q1=', -'chellosk','q1=', -'chellocom','q1=', -# Mirago -'miragobe','(txtsearch|qry)=', -'miragoch','(txtsearch|qry)=', -'miragode','(txtsearch|qry)=', -'miragodk','(txtsearch|qry)=', -'miragoes','(txtsearch|qry)=', -'miragofr','(txtsearch|qry)=', -'miragoit','(txtsearch|qry)=', -'miragonl','(txtsearch|qry)=', -'miragono','(txtsearch|qry)=', -'miragose','(txtsearch|qry)=', -'miragocouk','(txtsearch|qry)=', -'mirago','(txtsearch|qry)=', -'answerbus','', # Does not provide query parameters -# Minor Australian search engines -'anzwers','search=', -# Minor brazilian search engines -'engine','p1=', 'miner','q=', -# Minor chinese search engines -'baidu','word=', 'sina', 'word=', 'sohu','word=', 'sogou', 'query=', -# Minor czech search engines -'atlas','searchtext=', 'seznam','w=', 'quick','query=', 'centrum','q=', 'jyxo','s=', 'najdi','dotaz=', 'redbox','srch=', -# Minor danish search engines -'opasia','q=', 'danielsen','q=', 'sol','q=', 'jubii','soegeord=', 'finddk','words=', 'edderkoppen','query=', 'orbis','search_field=', '1klik','query=', 'ofir','querytext=', -# Minor dutch search engines -'ilse','search_for=', 'vindex','in=', -# Minor english search engines -'askuk','(ask|q)=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=', -'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=', -# Minor finnish search engines -'haku','w=', -# Minor french search engines -'francite','name=', 'clubinternet', 'q=', -'toile', 'q=', -'biglotron','question=', -# Minor german search engines -'aolde','q=', -'fireball','q=', 'infoseek','qt=', 'webde','su=', -'abacho','q=', 't-online','q=', -'metaspinner','qry=', -'metacrawler_de','qry=', -'wwweasel','q=', -'netluchs','query=', -'schoenerbrausen','q=', -# Minor hungarian search engines -'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=', -# Minor Indian search engines -'sify','keyword=', -# Minor Italian search engines -'virgilio','qs=', -'arianna','query=', -'supereva','q=', -'kataweb','q=', -'aliceitmaster','qs=', -# Minor Japanese search engines -'askjp','(ask|q)=', -# Minor Norwegian search engines -'start','q=', 'eniro','q=', -# Minor polish search engines -'wp','szukaj=', -# Minor russian search engines -'yandex', 'text=', 'rambler','words=', 'aport', 'r=', 'metabot', 'st=', -# Minor swedish search engines -'passagen','q=', -# Minor swiss search engines -'searchch', 'q=', 'bluewin', 'qry=' -); - -# SearchEnginesKnownUrlNotFound -# Known rules to extract not found keywords from a referrer search engine URL -#------------------------------------------------------------------------------ -%SearchEnginesKnownUrlNotFound=( -# Most common search engines -'msn','origq=' -); - -# If no rules are known, WordsToExtractSearchUrl will be used to search keyword parameter -# If no rules are known and search in WordsToExtractSearchUrl failed, this will be used to clean URL of not keyword parameters. -#------------------------------------------------------------------------------ -@WordsToExtractSearchUrl= ('ask=','claus=','general=','key=','kw=','keyword=','keywords=','MT=','p=','q=','qr=','qt=','query=','s=','search=','searchText=','string=','su=','txtsearch=','w='); -@WordsToCleanSearchUrl= ('act=','annuaire=','btng=','cat=','categoria=','cfg=','cof=','cou=','count=','cp=','dd=','domain=','dt=','dw=','enc=','exec=','geo=','hc=','height=','hits=','hl=','hq=','hs=','id=','kl=','lang=','loc=','lr=','matchmode=','medor=','message=','meta=','mode=','order=','page=','par=','pays=','pg=','pos=','prg=','qc=','refer=','sa=','safe=','sc=','sort=','src=','start=','style=','stype=','sum=','tag=','temp=','theme=','type=','url=','user=','width=','what=','\\.x=','\\.y=','y=','look='); - -# SearchEnginesKnownUTFCoding -# Known parameter that proves a search engine has coded its parameters in UTF-8 -#------------------------------------------------------------------------------ -%SearchEnginesKnownUTFCoding=( -# Most common search engines -'google','ie=utf-8', -'alltheweb','cs=utf-8' -); - - -# SearchEnginesHashLib -# List of search engines names -# 'search_engine_id', 'search_engine_name', -#------------------------------------------------------------------------------ -%SearchEnginesHashLib=( -# Major international search engines -'alexa','Alexa', -'alltheweb','AllTheWeb', -'altavista','AltaVista', -'a9', 'A9', -'dmoz','DMOZ', -'google_base','Google (Base)', -'google_froogle','Froogle (Google)', -'google_image','Google (Images)', -'google_cache','Google (cache)', -'google','Google', -'lycos','Lycos', -'msn','MSN Search', -'live','Windows Live', -'netscape','Netscape', -'aol','AOL', -'terra','Terra', -'tiscali','Tiscali', -'voila','Voila', -'search.com','Search.com', -'yahoo_mindset','Yahoo! Mindset', -'yahoo','Yahoo!', -'sympatico','Sympatico', -'excite','Excite', -# Minor international search engines -'go','Go.com', -'askde','Ask Deutschland', -'askes','Ask España', # break out Ask country specific engines. -'askfr','Ask France', -'askit','Ask Italia', -'asknl','Ask Nederland', -'ask','Ask', -'atomz','Atomz', -'dejanews','DejaNews', -'euroseek','Euroseek', -'findarticles','Find Articles', -'go2net','Go2Net (Metamoteur)', -'hotbot','Hotbot', -'infospace','InfoSpace', -'kvasir','Kvasir', -'looksmart','Looksmart', -'mamma','Mamma', -'metacrawler','MetaCrawler (Metamoteur)', -'nbci','NBCI', -'northernlight','NorthernLight', -'overture','Overture', # Replace 'goto\.com','Goto.com', -'dogpile','Dogpile', -'spray','Spray', -'teoma','Teoma', # Replace 'directhit\.com','DirectHit', -'webcrawler','WebCrawler', -'wisenut','WISENut', -'ixquick','ix quick', -'earthlink', 'Earth Link', -'iune','i-une', -'blingo','Blingo', -'centraldatabase','GPU p2p search', -'clusty','Clusty', -'mysearch','My Search', -'vivisimo','Vivisimo', -'kartoo','Kartoo', -'icerocket','Icerocket (Blog)', -'sphere','Sphere (Blog)', -# Chello Portals -'chelloat','Chello Austria', -'chellobe','Chello Belgium', -'chellocz','Chello Czech Republic', -'chellofr','Chello France', -'chellohu','Chello Hungary', -'chellonl','Chello Netherlands', -'chellono','Chello Norway', -'chellopl','Chello Poland', -'chellose','Chello Sweden', -'chellosk','Chello Slovakia', -'chellocom','Chello (Country not recognized)', -# Mirago -'miragobe','Mirago Belgium', -'miragoch','Mirago Switzerland', -'miragode','Mirago Germany', -'miragodk','Mirago Denmark', -'miragoes','Mirago Spain', -'miragofr','Mirago France', -'miragoit','Mirago Italy', -'miragonl','Mirago Netherlands', -'miragono','Mirago Norway', -'miragose','Mirago Sweden', -'miragocouk','Mirago UK', -'mirago','Mirago (country unknown)', -# Minor brazilian search engines -'engine','Cade', 'miner','Meta Miner', -# Minor chinese search engines -'baidu','Baidu', 'sina','Sina', 'sohu','Sohu', 'sogou','Sogou', -# Minor czech search engines -'atlas','Atlas.cz', 'seznam','Seznam', 'quick','Quick.cz', 'centrum','Centrum.cz', 'jyxo','Jyxo.cz', 'najdi','Najdi.to', 'redbox','RedBox.cz', -# Minor danish search-engines -'opasia','Opasia', 'danielsen','Thor (danielsen.com)', 'sol','SOL', 'jubii','Jubii', 'finddk','Find', 'edderkoppen','Edderkoppen', 'netstjernen','Netstjernen', 'orbis','Orbis', 'tyfon','Tyfon', '1klik','1Klik', 'ofir','Ofir', -# Minor dutch search engines -'ilse','Ilse','vindex','Vindex\.nl', -# Minor english search engines -'askuk','Ask UK', -'bbc','BBC', 'freeserve','Freeserve', 'looksmartuk','Looksmart UK', -'splut','Splut', 'spotjockey','Spotjockey', 'ukdirectory','UK Directory', 'ukindex','UKIndex', 'ukplus','UK Plus', 'searchy','searchy.co.uk', -# Minor finnish search engines -'haku','Ihmemaa', -# Minor french search engines -'aolfr','AOL (fr)', 'ctrouve','C\'est trouvé', 'francite','Francité', 'lbb', 'LBB', 'libertysurf', 'Libertysurf', 'free', 'Free.fr', 'clubinternet', 'Club-internet', -'toile', 'Toile du Québec', -'biglotron','Biglotron', -# Minor German search engines -'aolde','AOL (de)', -'fireball','Fireball', 'infoseek','Infoseek', 'webde','Web.de', -'abacho','Abacho', 't-online','T-Online', -'allesklar','allesklar.de', 'meinestadt','meinestadt.de', -'metaspinner','metaspinner', -'metacrawler_de','metacrawler.de', -'wwweasel','WWWeasel', -'netluchs','Netluchs', -'schoenerbrausen','Schoenerbrausen/', -# Minor hungarian search engines -'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkeresõ', 'goliat','Góliát', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Keresõ', -# Minor Indian search engines -'sify','Sify', -# Minor italian search engines -'virgilio','Virgilio', -'arianna','Arianna', -'supereva','Supereva', -'kataweb','Kataweb', -'aliceitmaster','search.alice.it.master', -# Minor Japanese search engines -'askjp','Ask Japan', -# Minor Norwegian search engines -'start','start.no', 'eniro','Eniro', -# Minor polish search engines -'wp','Szukaj', -# Minor russian search engines -'yandex', 'Yandex', 'aport', 'Aport', 'rambler', 'Rambler', 'turtle', 'Turtle', 'metabot', 'MetaBot', -# Minor swedish search engines -'passagen','Evreka', -# Minor Swiss search engines -'searchch', 'search.ch', 'bluewin', 'search.bluewin.ch', -# Generic search engines -'search','Unknown search engines' -); - - -# Sanity check. -# Enable this code and run perl search_engines.pm to check file entries are ok -#----------------------------------------------------------------------------- -#foreach my $key (@SearchEnginesSearchIDOrder_list1) { -# if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID"); -# foreach my $key2 (@SearchEnginesSearchIDOrder_list2) { if ($key2 eq $key) { error("$key is in 1 and 2\n"); } } -# foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 1 and gen\n"); } } -#} } -#foreach my $key (@SearchEnginesSearchIDOrder_list2) { -# if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID"); -# foreach my $key2 (@SearchEnginesSearchIDOrder_list1) { if ($key2 eq $key) { error("$key is in 2 and 1\n"); } } -# foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 2 and gen\n"); } } -#} } -#foreach my $key (@SearchEnginesSearchIDOrder_listgen) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_listgen with no value in SearchEnginesHashID"); } } -#foreach my $key (keys %NotSearchEnginesKeys) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in NotSearchEnginesKeys with no value in SearchEnginesHashID"); } } -#foreach my $key (keys %SearchEnginesKnownUrl) { -# my $found=0; -# foreach my $key2 (values %SearchEnginesHashID) { -# if ($key eq $key2) { $found=1; last; } -# } -# if (! $found) { die "Entry '$key' has been found in SearchEnginesKnownUrl with no value in SearchEnginesHashID"; } -#} -#foreach my $key (keys %SearchEnginesHashLib) { -# my $found=0; -# foreach my $key2 (values %SearchEnginesHashID) { -# if ($key eq $key2) { $found=1; last; } -# } -# if (! $found) { die "Entry '$key' has been found in SearchEnginesHashLib with no value in SearchEnginesHashID"; } -#} -#print @SearchEnginesSearchIDOrder_list1." ".@SearchEnginesSearchIDOrder_list2." ".@SearchEnginesSearchIDOrder_listgen; - -1; +# AWSTATS SEARCH ENGINES DATABASE +#------------------------------------------------------------------------------ +# If you want to add a Search Engine to extend AWStats database detection capabilities, +# you must add an entry in SearchEnginesSearchIDOrder, SearchEnginesHashID and in +# SearchEnginesHashLib. +# An entry if known in SearchEnginesKnownUrl is also welcome. +#------------------------------------------------------------------------------ +# $Revision$ - $Author$ - $Date$ + +# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html +# added minor italian search engines +# arianna http://arianna.libero.it/ +# supereva http://search.supereva.com/ +# kataweb http://kataweb.it/ +# corrected uk looksmart +# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmart','key=', +# to +# 'askuk','ask=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=', +# corrected spelling +# internationnal -> international +# added 'google\.'=>'mail\.google\.', to NotSearchEnginesKeys in order to +# avoid counting gmail referrals as search engine traffic +# 2005-08-21 Sean Carlos http://www.antezeta.com/awstats.html +# avoid counting babelfish.altavista referrals as search engine traffic +# avoid counting translate.google referrals as search engine traffic +# 2005-11-20 Sean Carlos +# added missing 'tiscali','key=', entry. Check order +# 2005-11-22 Sean Carlos +# added Google Base & Froogle. Froogle not tested. +# 2006-04-18 Sean Carlos http://www.antezeta.com/awstats.html +# added biglotron.com (France) +# added blingo http://www.blingo.com/ +# added Clusty & Vivisimo +# added eniro.no (Norway) [https://sourceforge.net/forum/message.php?msg_id=3134783] +# added GPU p2p search http://search.centraldatabase.org/ +# added mail.tiscali to "not search engines list" [https://sourceforge.net/forum/message.php?msg_id=3166688] +# added Ask group's "mysearch" +# added sify.com (India) +# added sogou.com (Cina) [https://sourceforge.net/forum/message.php?msg_id=3501603] +# Ask changes: +# - added Ask Japan (ask.jp) +# - break out Ask new country level variants (DE, ES, FR, IT, NL) +# - updated Ask name from Ask Jevees +# - added Ask q= parameter - many recent searches probably not recognized; [https://sourceforge.net/forum/message.php?msg_id=3465444] +# - updated Ask uk (new uk.ask.com added to older ask.co.uk) +# updated voila kw|rdata parameter [https://sourceforge.net/forum/message.php?msg_id=3373912] +# for each new engine, added link to Search Engine. This serves to document engine. Done for major & Italian engines as well. Requires patch +# to AWStats to allow untranslated html. Otherwise html will appear instead of link. +# reviewed mnoGoSearch (http://www.mnogosearch.org/); the search engined mentioned no longer +# exists https://sourceforge.net/forum/message.php?msg_id=3025426 +# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html +# added 10 Chello European broadband portals (Austria, Belgium, Czech Republic, France, Hungary, The Netherlands, Norway, Poland, Slovakia, Sweden) +# added Alice Internal Search (blends data with Google?) search.alice.it.master:10005 +# added detection of google cache views from IPs 66.249.93.104 72.14.203.104 72.14.207.104 +# To do: add more extensive IP list; keywords not yet detected. +# added icerocket.com blog search http://www.icerocket.com/ +# added live.com (msn) http://www.live.com/ +# added Meta motor kartoo. Note: Kartoo does not provide search words in referrers, thus the engine will appear in the +# search engine list but the actual search words are not available. +# added netluchs.de http://www.netluchs.de/ +# added sphere.com blog search http://www.sphere.com/ +# added wwweasel.de http://wwweasel.de +# added Yahoo Mindset! http://mindset.research.yahoo.com/ +# updated Mirago query parameter recognition (qry=); added breakout for each country (France, Germany, Spain, Italy, Norway, Sweden, Denmark, Netherlands, Belgium, Switzerland) +# 2006-05-13 Sean Carlos http://www.antezeta.com/awstats.html +# added Google cache IPs 64.233.183.104 & 66.102.7.104 +# 2006-05-20 Sean Carlos http://www.antezeta.com/awstats.html +# anzwers.com.au +# schoenerbrausen.de http://www.schoenerbrausen.de/ +# added Google cache IP 216.239.59.104 +# answerbus http://www.answerbus.com/ (does not provide keywords) +# 2006-05-23 Sean Carlos http://www.antezeta.com/awstats.html +# added Google cache IP 66.102.9.104, 64.233.161.104 +# 2006-06-23 Sean Carlos http://www.antezeta.com/awstats.html +# added Alice Search search.alice.it +# added GoodSearch http://www.goodsearch.com/ (does not provide keywords) "a Yahoo-powered search engine that donates money to your favorite charity or school each time you search the web" +# added googlee.com, variant of Google +# added gotuneed http://www.gotuneed.com/ Italian search engine, in beta +# added icq.com +# added logic to parse Google Cache search keywords. Seems to work for alpha but not numeric cache IDs, i.e. search?q=cache:lWVLmnuGJswJ: is recognized but q=cache:Yv5qxeJNuhgJ: is not recognized. The URL triggering the keywords will also appear. The URLs are probably too varied to parse out? +# added Nusearch http://www.nusearch.com/ +# added Polymeta www.polymeta.hu (does not provide keywords) +# added scroogle http://www.scroogle.org/ (does not always provide keywords) +# added Tango http://tango.hu/search.php?st=0&q=jeles+napok +# Changed Google Cache notation 64\.233\.(161|167|179|183|187)\.104 to 64\.233\.1[0-9]{2}\.104 +# 72\.14\.(203|205|207|209|221)\.104 to 72\.14\.2[0-9]{2}\.104 +# 216\.239\.(51|59)\.104 to 216\.239\.5[0-9]\.104 +# 66\.102\.(7|9)\.104 to 66\.102\.[1-9]\.104 +# 2006-06-27 Sean Carlos http://www.antezeta.com/awstats.html +# added Onet.pl http://szukaj.onet.pl/ +# corrected name "Wirtualna Polska" from "Szukaj" (search); added link http://szukaj.wp.pl/ +# 2006-06-30 Sean Carlos http://www.antezeta.com/awstats.html +# Additional Polish Search Engines: +# added Dodaj.pl http://www.dodaj.pl/ +# added Gazeta.pl http://szukaj.gazeta.pl/ +# added Gery.pl http://szukaj.gery.pl/ +# added Hoga.pl http://www.hoga.pl/ +# added Interia.pl http://www.google.interia.pl/ +# added Katalog.Onet.pl http://katalog.onet.pl/ +# added NetSprint.pl http://www.netsprint.pl/ +# added o2.pl http://szukaj2.o2.pl/ +# added Polska http://szukaj.polska.pl/ +# added Szukacz http://www.szukacz.pl/ +# added Wow.pl http://szukaj.wow.pl/ +# added Sagool http://sagool.jp/ + +# 2006-08-25 Social Bookmarks +# International +# added del.icio.us/search - for now, just search referrer. To do: consider /tag/(tagname) referrer? +# added stumbleupon.com - No keywords supplied. +# added swik.net +# added digg. Keywords sometimes supplied. +# Italy +# added segnalo.alice.it - No keywords supplied. +# added ineffabile.it - No keywords supplied. + +# added filter for google groups. Attempt to parse group name as keyword. + +# 2006-09-14 +# added Eniro Sverige http://www.eniro.se/ +# added MyWebSearch http://search.mywebsearch.com/ +# added Teecno http://www.teecno.it/ Italian Open Source Search Engine + +#package AWSSE; + +# 2006-09-25 (Gabor Moizes) +# added 4-counter (Google alternative) http://4-counter.com/ +# added Googlecom (Google alternative) http://googlecom.com/ +# added Goggle (Google alternative) http://goggle.co.hu/ +# added Comet toolbar http://as.starware.com +# added new IP for Yahoo: 216.109.125.130 +# added Ledix http://ledix.net/ +# added AT&T search (powered by Google) http://www.att.net/ +# added Keresolap (Hungarian search engine) http://www.keresolap.hu/ +# added Mozbot (French search engine) http://www.mozbot.fr/ +# added Zoznam (Slovak search engine) http://www.zoznam.sk/ +# added sapo.pt (Portuguese search engine) http://www.sapo.pt/ +# added shaw.ca (powered by Google) http://start.shaw.ca/ +# added Searchalot http://www.searchalot.com/ +# added Copernic http://www.copernic.com/ +# added 216.109.125.130 to Yahoo +# added 66.218.69.11 to Yahoo +# added Avantfind http://www.avantfind.com/ +# added Steadysearch http://www.steadysearch.com/ +# added Steadysearch http://www.steady-search.com/ +# modified 216\.239\.5[0-9]\.104/search to 216\.239\.5[0-9]\.104 + + +# SearchEnginesSearchIDOrder +# It contains all matching criteria to search for in log fields. This list is +# used to know in which order to search Search Engines IDs. +# Most frequent one are in list1, used when LevelForSearchEnginesDetection is 1 or more +# Minor robots are in list2, used when LevelForSearchEnginesDetection is 2 or more +# Note: Regex IDs are in lower case and ' ' and '+' are changed into '_' +#------------------------------------------------------------------------------ +@SearchEnginesSearchIDOrder_list1=( +# Major international search engines +'base\.google\.', +'froogle\.google\.', +'groups\.google\.', +'images\.google\.', +'google\.', +'googlee\.', +'googlecom\.com', +'goggle\.co\.hu', +'216\.239\.(35|37|39|51)\.100', +'216\.239\.(35|37|39|51)\.101', +'216\.239\.5[0-9]\.104', +'64\.233\.1[0-9]{2}\.104', +'66\.102\.[1-9]\.104', +'66\.249\.93\.104', +'72\.14\.2[0-9]{2}\.104', +'msn\.', +'live\.com', +'voila\.', +'mindset\.research\.yahoo', +'yahoo\.','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)', +'search\.aol\.co', +'tiscali\.', +'lycos\.', +'alexa\.com', +'alltheweb\.com', +'altavista\.', +'a9\.com', +'dmoz\.org', +'netscape\.', +'search\.terra\.', +'www\.search\.com', +'search\.sli\.sympatico\.ca', +'excite\.' +); + +@SearchEnginesSearchIDOrder_list2=( +# Minor international search engines +'4\-counter\.com', +'att\.net', +'northernlight\.', +'hotbot\.', +'kvasir\.', +'webcrawler\.', +'metacrawler\.', +'go2net\.com', +'(^|\.)go\.com', +'euroseek\.', +'looksmart\.', +'spray\.', +'nbci\.com\/search', +'de\.ask.\com', # break out Ask country specific engines. (.jp is in Japan section) +'es\.ask.\com', +'fr\.ask.\com', +'it\.ask.\com', +'nl\.ask.\com', +'uk\.ask.\com', +'(^|\.)ask\.com', +'atomz\.', +'overture\.com', # Replace 'goto\.com','Goto.com', +'teoma\.', +'findarticles\.com', +'infospace\.com', +'mamma\.', +'dejanews\.', +'dogpile\.com', +'wisenut\.com', +'ixquick\.com', +'search\.earthlink\.net', +'i-une\.com', +'blingo\.com', +'centraldatabase\.org', +'clusty\.com', +'mysearch\.', +'vivisimo\.com', +'kartoo\.com', +'icerocket\.com', +'sphere\.com', +'ledix\.net', +'start\.shaw\.ca', +'searchalot\.com', +'copernic\.com', +'avantfind\.com', +'steadysearch\.com', +'steady-search\.com', +# Chello Portals +'chello\.at', +'chello\.be', +'chello\.cz', +'chello\.fr', +'chello\.hu', +'chello\.nl', +'chello\.no', +'chello\.pl', +'chello\.se', +'chello\.sk', +'chello', # required as catchall for new countries not yet known +# Mirago +'mirago\.be', +'mirago\.ch', +'mirago\.de', +'mirago\.dk', +'es\.mirago\.com', +'mirago\.fr', +'mirago\.it', +'mirago\.nl', +'no\.mirago\.com', +'mirago\.se', +'mirago\.co\.uk', +'mirago', # required as catchall for new countries not yet known +'answerbus\.com', +'icq\.com\/search', +'nusearch\.com', +'goodsearch\.com', +'scroogle\.org', +'questionanswering\.com', +'mywebsearch\.com', +'as\.starware\.com', +# Social Bookmarking Services +'del\.icio\.us', +'digg\.com', +'stumbleupon\.com', +'swik\.net', +'segnalo\.alice\.it', +'ineffabile\.it', +# Minor Australian search engines +'anzwers\.com\.au', +# Minor brazilian search engines +'engine\.exe', 'miner\.bol\.com\.br', +# Minor chinese search engines +'baidu\.com','search\.sina\.com','search\.sohu\.com', 'sogou\.com', +# Minor czech search engines +'atlas\.cz','seznam\.cz','quick\.cz','centrum\.cz','jyxo\.(cz|com)','najdi\.to','redbox\.cz', +# Minor danish search-engines +'opasia\.dk', 'danielsen\.com', 'sol\.dk', 'jubii\.dk', 'find\.dk', 'edderkoppen\.dk', 'netstjernen\.dk', 'orbis\.dk', 'tyfon\.dk', '1klik\.dk', 'ofir\.dk', +# Minor dutch search engines +'ilse\.','vindex\.', +# Minor english search engines +'(^|\.)ask\.co\.uk','bbc\.co\.uk/cgi-bin/search','ifind\.freeserve','looksmart\.co\.uk','splut\.','spotjockey\.','ukdirectory\.','ukindex\.co\.uk','ukplus\.','searchy\.co\.uk', +# Minor finnish search engines +'haku\.www\.fi', +# Minor french search engines +'recherche\.aol\.fr','ctrouve\.','francite\.','\.lbb\.org','rechercher\.libertysurf\.fr', 'search[\w\-]+\.free\.fr', 'recherche\.club-internet\.fr', +'toile\.com', 'biglotron\.com', +'mozbot\.fr', +# Minor german search engines +'sucheaol\.aol\.de', +'fireball\.de','infoseek\.de','suche\d?\.web\.de','[a-z]serv\.rrzn\.uni-hannover\.de', +'suchen\.abacho\.de','brisbane\.t-online\.de','allesklar\.de','meinestadt\.de', +'212\.227\.33\.241', +'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)', +'wwweasel\.de', +'netluchs\.de', +'schoenerbrausen\.de', +# Minor Hungarian search engines +'heureka\.hu','vizsla\.origo\.hu','lapkereso\.hu','goliat\.hu','index\.hu','wahoo\.hu','webmania\.hu','search\.internetto\.hu', +'tango\.hu', +'keresolap\.hu', +'polymeta\.hu', +# Minor Indian search engines +'sify\.com', +# Minor Italian search engines +'virgilio\.it','arianna\.libero\.it','supereva\.com','kataweb\.it','search\.alice\.it\.master','search\.alice\.it','gotuneed\.com', +'godado','jumpy\.it','shinyseek\.it','teecno\.it', +# Minor Japanese search engines +'ask\.jp','sagool\.jp', +# Minor Norwegian search engines +'sok\.start\.no', 'eniro\.no', +# Minor Polish search engines +'szukaj\.wp\.pl','szukaj\.onet\.pl','dodaj\.pl','gazeta\.pl','gery\.pl','hoga\.pl','netsprint\.pl','interia\.pl','katalog\.onet\.pl','o2\.pl','polska\.pl','szukacz\.pl','wow\.pl', +# Minor russian search engines +'ya(ndex)?\.ru', 'aport\.ru', 'rambler\.ru', 'turtle\.ru', 'metabot\.ru', +# Minor Swedish search engines +'evreka\.passagen\.se','eniro\.se', +# Minor Slovak search engines +'zoznam\.sk', +# Minor Portuguese search engines +'sapo\.pt', +# Minor swiss search engines +'search\.ch', 'search\.bluewin\.ch' +); +@SearchEnginesSearchIDOrder_listgen=( +# Generic search engines +'search\..*\.\w+' +); + + +# NotSearchEnginesKeys +# If a search engine key is found, we check its exclude list to know if it's +# really a search engine +#------------------------------------------------------------------------------ +%NotSearchEnginesKeys=( +'altavista\.'=>'babelfish\.altavista\.', +'google\.'=>'mail\.google\.', +'google\.'=>'translate\.google\.', +'msn\.'=>'hotmail\.msn\.', +'tiscali\.'=>'mail\.tiscali\.', +'yahoo\.'=>'mail\.yahoo\.' +); + + +# SearchEnginesHashID +# Each Search Engine Search ID is associated to an AWStats id string +#------------------------------------------------------------------------------ +%SearchEnginesHashID = ( +# Major international search engines +'base\.google\.','google_base', +'froogle\.google\.','google_froogle', +'groups\.google\.','google_groups', +'images\.google\.','google_image', +'google\.','google', +'googlee\.','google', +'googlecom\.com','google', +'goggle\.co\.hu','google', +'216\.239\.(35|37|39|51)\.100','google_cache', +'216\.239\.(35|37|39|51)\.101','google_cache', +'216\.239\.5[0-9]\.104','google_cache', +'64\.233\.1[0-9]{2}\.104','google_cache', +'66\.102\.[1-9]\.104','google_cache', +'66\.249\.93\.104','google_cache', +'72\.14\.2[0-9]{2}\.104','google_cache', +'msn\.','msn', +'live\.com','live', +'voila\.','voila', +'mindset\.research\.yahoo','yahoo_mindset', +'yahoo\.','yahoo','(66\.218\.71\.225|216\.109\.117\.135|216\.109\.125\.130|66\.218\.69\.11)','yahoo', +'lycos\.','lycos', +'alexa\.com','alexa', +'alltheweb\.com','alltheweb', +'altavista\.','altavista', +'a9\.com','a9', +'dmoz\.org','dmoz', +'netscape\.','netscape', +'search\.terra\.','terra', +'www\.search\.com','search.com', +'tiscali\.','tiscali', +'search\.aol\.co','aol', +'search\.sli\.sympatico\.ca','sympatico', +'excite\.','excite', +# Minor international search engines +'4\-counter\.com','google4counter', +'att\.net','att', +'northernlight\.','northernlight', +'hotbot\.','hotbot', +'kvasir\.','kvasir', +'webcrawler\.','webcrawler', +'metacrawler\.','metacrawler', +'go2net\.com','go2net', +'(^|\.)go\.com','go', +'euroseek\.','euroseek', +'looksmart\.','looksmart', +'spray\.','spray', +'nbci\.com\/search','nbci', +'de\.ask.\com','askde', # break out Ask country specific engines. +'es\.ask.\com','askes', +'fr\.ask.\com','askfr', +'it\.ask.\com','askit', +'nl\.ask.\com','asknl', +'uk\.ask.\com','askuk', +'(^|\.)ask\.co\.uk','askuk', +'(^|\.)ask\.com','ask', +'atomz\.','atomz', +'overture\.com','overture', # Replace 'goto\.com','Goto.com', +'teoma\.','teoma', +'findarticles\.com','findarticles', +'infospace\.com','infospace', +'mamma\.','mamma', +'dejanews\.','dejanews', +'dogpile\.com','dogpile', +'wisenut\.com','wisenut', +'ixquick\.com','ixquick', +'search\.earthlink\.net','earthlink', +'i-une\.com','iune', +'blingo\.com','blingo', +'centraldatabase\.org','centraldatabase', +'clusty\.com','clusty', +'mysearch\.','mysearch', +'vivisimo\.com','vivisimo', +'kartoo\.com','kartoo', +'icerocket\.com','icerocket', +'sphere\.com','sphere', +'ledix\.net','ledix', +'start\.shaw\.ca','shawca', +'searchalot\.com','searchalot', +'copernic\.com','copernic', +'avantfind\.com','avantfind', +'steadysearch\.com','steadysearch', +'steady-search\.com','steadysearch', +# Chello Portals +'chello\.at','chelloat', +'chello\.be','chellobe', +'chello\.cz','chellocz', +'chello\.fr','chellofr', +'chello\.hu','chellohu', +'chello\.nl','chellonl', +'chello\.no','chellono', +'chello\.pl','chellopl', +'chello\.se','chellose', +'chello\.sk','chellosk', +'chello','chellocom', +# Mirago +'mirago\.be','miragobe', +'mirago\.ch','miragoch', +'mirago\.de','miragode', +'mirago\.dk','miragodk', +'es\.mirago\.com','miragoes', +'mirago\.fr','miragofr', +'mirago\.it','miragoit', +'mirago\.nl','miragonl', +'no\.mirago\.com','miragono', +'mirago\.se','miragose', +'mirago\.co\.uk','miragocouk', +'mirago','mirago', # required as catchall for new countries not yet known +'answerbus\.com','answerbus', +'icq\.com\/search','icq', +'nusearch\.com','nusearch', +'goodsearch\.com','goodsearch', +'scroogle\.org','scroogle', +'questionanswering\.com','questionanswering', +'mywebsearch\.com','mywebsearch', +'as\.starware\.com','comettoolbar', +# Social Bookmarking Services +'del\.icio\.us','delicious', +'digg\.com','digg', +'stumbleupon\.com','stumbleupon', +'swik\.net','swik', +'segnalo\.alice\.it','segnalo', +'ineffabile\.it','ineffabile', +# Minor Australian search engines +'anzwers\.com\.au','anzwers', +# Minor brazilian search engines +'engine\.exe','engine', +'miner\.bol\.com\.br','miner', +# Minor chinese search engines +'baidu\.com','baidu', +'search\.sina\.com','sina', +'search\.sohu\.com','sohu', +'sogou\.com','sogou', +# Minor czech search engines +'atlas\.cz','atlas', +'seznam\.cz','seznam', +'quick\.cz','quick', +'centrum\.cz','centrum', +'jyxo\.(cz|com)','jyxo', +'najdi\.to','najdi', +'redbox\.cz','redbox', +# Minor danish search-engines +'opasia\.dk','opasia', +'danielsen\.com','danielsen', +'sol\.dk','sol', +'jubii\.dk','jubii', +'find\.dk','finddk', +'edderkoppen\.dk','edderkoppen', +'netstjernen\.dk','netstjernen', +'orbis\.dk','orbis', +'tyfon\.dk','tyfon', +'1klik\.dk','1klik', +'ofir\.dk','ofir', +# Minor dutch search engines +'ilse\.','ilse', +'vindex\.','vindex', +# Minor english search engines +'bbc\.co\.uk/cgi-bin/search','bbc', +'ifind\.freeserve','freeserve', +'looksmart\.co\.uk','looksmartuk', +'splut\.','splut', +'spotjockey\.','spotjockey', +'ukdirectory\.','ukdirectory', +'ukindex\.co\.uk','ukindex', +'ukplus\.','ukplus', +'searchy\.co\.uk','searchy', +# Minor finnish search engines +'haku\.www\.fi','haku', +# Minor french search engines +'recherche\.aol\.fr','aolfr', +'ctrouve\.','ctrouve', +'francite\.','francite', +'\.lbb\.org','lbb', +'rechercher\.libertysurf\.fr','libertysurf', +'search[\w\-]+\.free\.fr','free', +'recherche\.club-internet\.fr','clubinternet', +'toile\.com','toile', +'biglotron\.com', 'biglotron', +'mozbot\.fr', 'mozbot', +# Minor german search engines +'sucheaol\.aol\.de','aolde', +'fireball\.de','fireball', +'infoseek\.de','infoseek', +'suche\d?\.web\.de','webde', +'[a-z]serv\.rrzn\.uni-hannover\.de','meta', +'suchen\.abacho\.de','abacho', +'brisbane\.t-online\.de','t-online', +'allesklar\.de','allesklar', +'meinestadt\.de','meinestadt', +'212\.227\.33\.241','metaspinner', +'(161\.58\.227\.204|161\.58\.247\.101|212\.40\.165\.90|213\.133\.108\.202|217\.160\.108\.151|217\.160\.111\.99|217\.160\.131\.108|217\.160\.142\.227|217\.160\.176\.42)','metacrawler_de', +'wwweasel\.de','wwweasel', +'netluchs\.de','netluchs', +'schoenerbrausen\.de','schoenerbrausen', +# Minor Hungarian search engines +'heureka\.hu','heureka', +'vizsla\.origo\.hu','origo', +'lapkereso\.hu','lapkereso', +'goliat\.hu','goliat', +'index\.hu','indexhu', +'wahoo\.hu','wahoo', +'webmania\.hu','webmania', +'search\.internetto\.hu','internetto', +'tango\.hu','tango_hu', +'keresolap\.hu','keresolap_hu', +'polymeta\.hu','polymeta_hu', +# Minor Indian search engines +'sify\.com','sify', +# Minor Italian search engines +'virgilio\.it','virgilio', +'arianna\.libero\.it','arianna', +'supereva\.com','supereva', +'kataweb\.it','kataweb', +'search\.alice\.it\.master','aliceitmaster', +'search\.alice\.it','aliceit', +'gotuneed\.com','gotuneed', +'godado','godado', +'jumpy\.it','jumpy\.it', +'shinyseek\.it','shinyseek\.it', +'teecno\.it','teecnoit', +# Minor Japanese search engines +'ask\.jp','askjp', +'sagool\.jp','sagool', +# Minor Norwegian search engines +'sok\.start\.no','start', 'eniro\.no','eniro', +# Minor Polish search engines +'szukaj\.wp\.pl','wp', +'szukaj\.onet\.pl','onetpl', +'dodaj\.pl','dodajpl', +'gazeta\.pl','gazetapl', +'gery\.pl','gerypl', +'netsprint\.pl\/hoga\-search','hogapl', +'netsprint\.pl','netsprintpl', +'interia\.pl','interiapl', +'katalog\.onet\.pl','katalogonetpl', +'o2\.pl','o2pl', +'polska\.pl','polskapl', +'szukacz\.pl','szukaczpl', +'wow\.pl','wowpl', +# Minor russian search engines +'ya(ndex)?\.ru','yandex', +'aport\.ru','aport', +'rambler\.ru','rambler', +'turtle\.ru','turtle', +'metabot\.ru','metabot', +# Minor Swedish search engines +'evreka\.passagen\.se','passagen', +'eniro\.se','enirose', +# Minor Slovak search engines +'zoznam\.sk','zoznam', +# Minor Portuguese search engines +'sapo\.pt','sapo', +# Minor swiss search engines +'search\.ch','searchch', +'search\.bluewin\.ch','bluewin', +# Generic search engines +'search\..*\.\w+','search' +); + + +# SearchEnginesWithKeysNotInQuery +# List of search engines that store keyword as page instead of query parameter +#------------------------------------------------------------------------------ +%SearchEnginesWithKeysNotInQuery=( +'a9',1 # www.a9.com/searckey1%20searchkey2 +); + +# SearchEnginesKnownUrl +# Known rules to extract keywords from a referrer search engine URL +#------------------------------------------------------------------------------ +%SearchEnginesKnownUrl=( +# Most common search engines +'alexa','q=', +'alltheweb','q(|uery)=', +'altavista','q=', +'a9','a9\.com\/', +'dmoz','search=', +'google_base','(p|q|as_p|as_q)=', +'google_froogle','(p|q|as_p|as_q)=', +'google_groups','group\/', # does not work +'google_image','(p|q|as_p|as_q)=', +'google_cache','(p|q|as_p|as_q)=cache:[0-9A-Za-z]{12}:', +'google','(p|q|as_p|as_q)=', +'lycos','query=', +'msn','q=', +'live','q=', +'netscape','search=', +'tiscali','key=', +'aol','query=', +'terra','query=', +'voila','(kw|rdata)=', +'search.com','q=', +'yahoo_mindset','p=', +'yahoo','p=', +'sympatico', 'query=', +'excite','search=', +# Minor international search engines +'google4counter','(p|q|as_p|as_q)=', +'att','qry=', +'go','qt=', +'askde','(ask|q)=', # break out Ask country specific engines. +'askes','(ask|q)=', +'askfr','(ask|q)=', +'askit','(ask|q)=', +'asknl','(ask|q)=', +'ask','(ask|q)=', +'atomz','sp-q=', +'euroseek','query=', +'findarticles','key=', +'go2net','general=', +'hotbot','mt=', +'infospace','qkw=', +'kvasir', 'q=', +'looksmart','key=', +'mamma','query=', +'metacrawler','general=', +'nbci','keyword=', +'northernlight','qr=', +'overture','keywords=', +'dogpile', 'q(|kw)=', +'spray','string=', +'teoma','q=', +'webcrawler','searchText=', +'wisenut','query=', +'ixquick', 'query=', +'earthlink', 'q=', +'iune','(keywords|q)=', +'blingo','q=', +'centraldatabase','query=', +'clusty','query=', +'mysearch','searchfor=', +'vivisimo','query=', +# kartoo: No keywords passed in referring URL. +'kartoo',, +'icerocket','q=', +'sphere','q=', +'ledix','q=', +'shawca','q=', +'searchalot','q=', +'copernic','web\/', +'avantfind','keywords=', +'steadysearch','w=', +# Chello Portals +'chelloat','q1=', +'chellobe','q1=', +'chellocz','q1=', +'chellofr','q1=', +'chellohu','q1=', +'chellonl','q1=', +'chellono','q1=', +'chellopl','q1=', +'chellose','q1=', +'chellosk','q1=', +'chellocom','q1=', +# Mirago +'miragobe','(txtsearch|qry)=', +'miragoch','(txtsearch|qry)=', +'miragode','(txtsearch|qry)=', +'miragodk','(txtsearch|qry)=', +'miragoes','(txtsearch|qry)=', +'miragofr','(txtsearch|qry)=', +'miragoit','(txtsearch|qry)=', +'miragonl','(txtsearch|qry)=', +'miragono','(txtsearch|qry)=', +'miragose','(txtsearch|qry)=', +'miragocouk','(txtsearch|qry)=', +'mirago','(txtsearch|qry)=', +'answerbus','', # Does not provide query parameters +'icq','q=', +'nusearch','nusearch_terms=', +'goodsearch','Keywords=', +'scroogle','Gw=', # Does not always provide query parameters +'questionanswering','', +'mywebsearch','searchfor=', +'comettoolbar','qry=', +# Social Bookmarking Services +'delicious','all=', +'digg','s=', +'stumbleupon','', +'swik','swik\.net/', # does not work. Keywords follow domain, e.g. http://swik.net/awstats+analytics +'segnalo','', +'ineffabile','', +# Minor Australian search engines +'anzwers','search=', +# Minor brazilian search engines +'engine','p1=', 'miner','q=', +# Minor chinese search engines +'baidu','word=', 'sina', 'word=', 'sohu','word=', 'sogou', 'query=', +# Minor czech search engines +'atlas','searchtext=', 'seznam','w=', 'quick','query=', 'centrum','q=', 'jyxo','s=', 'najdi','dotaz=', 'redbox','srch=', +# Minor danish search engines +'opasia','q=', 'danielsen','q=', 'sol','q=', 'jubii','soegeord=', 'finddk','words=', 'edderkoppen','query=', 'orbis','search_field=', '1klik','query=', 'ofir','querytext=', +# Minor dutch search engines +'ilse','search_for=', 'vindex','in=', +# Minor english search engines +'askuk','(ask|q)=', 'bbc','q=', 'freeserve','q=', 'looksmartuk','key=', +'splut','pattern=', 'spotjockey','Search_Keyword=', 'ukindex', 'stext=', 'ukdirectory','k=', 'ukplus','search=', 'searchy', 'search_term=', +# Minor finnish search engines +'haku','w=', +# Minor french search engines +'francite','name=', 'clubinternet', 'q=', +'toile', 'q=', +'biglotron','question=', +'mozbot','q=', +# Minor german search engines +'aolde','q=', +'fireball','q=', 'infoseek','qt=', 'webde','su=', +'abacho','q=', 't-online','q=', +'metaspinner','qry=', +'metacrawler_de','qry=', +'wwweasel','q=', +'netluchs','query=', +'schoenerbrausen','q=', +# Minor Hungarian search engines +'heureka','heureka=', 'origo','(q|search)=', 'goliat','KERESES=', 'wahoo','q=', 'internetto','searchstr=', +'keresolap_hu','q=', +'tango_hu','q=', +'polymeta_hu','', +# Minor Indian search engines +'sify','keyword=', +# Minor Italian search engines +'virgilio','qs=', +'arianna','query=', +'supereva','q=', +'kataweb','q=', +'aliceitmaster','qs=', +'aliceit','qs=', +'gotuneed','', # Not yet known +'godado','Keywords=', +'jumpy\.it','searchWord=', +'shinyseek\.it','KEY=', +'teecnoit','q=', +# Minor Japanese search engines +'askjp','(ask|q)=', +'sagool','q=', +# Minor Norwegian search engines +'start','q=', 'eniro','q=', +# Minor Polish search engines +'wp','szukaj=', +'onetpl','qt=', +'dodajpl','keyword=', +'gazetapl','slowo=', +'gerypl','q=', +'hogapl','qt=', +'netsprintpl','q=', +'interiapl','q=', +'katalogonetpl','qt=', +'o2pl','qt=', +'polskapl','qt=', +'szukaczpl','q=', +'wowpl','q=', +# Minor russian search engines +'yandex', 'text=', 'rambler','words=', 'aport', 'r=', 'metabot', 'st=', +# Minor swedish search engines +'passagen','q=', +'enirose','q=', +# Minor swiss search engines +'searchch', 'q=', 'bluewin', 'qry=' +); + +# SearchEnginesKnownUrlNotFound +# Known rules to extract not found keywords from a referrer search engine URL +#------------------------------------------------------------------------------ +%SearchEnginesKnownUrlNotFound=( +# Most common search engines +'msn','origq=' +); + +# If no rules are known, WordsToExtractSearchUrl will be used to search keyword parameter +# If no rules are known and search in WordsToExtractSearchUrl failed, this will be used to clean URL of not keyword parameters. +#------------------------------------------------------------------------------ +@WordsToExtractSearchUrl= ('ask=','claus=','general=','key=','kw=','keyword=','keywords=','MT=','p=','q=','qr=','qt=','query=','s=','search=','searchText=','string=','su=','txtsearch=','w='); +@WordsToCleanSearchUrl= ('act=','annuaire=','btng=','cat=','categoria=','cfg=','cof=','cou=','count=','cp=','dd=','domain=','dt=','dw=','enc=','exec=','geo=','hc=','height=','hits=','hl=','hq=','hs=','id=','kl=','lang=','loc=','lr=','matchmode=','medor=','message=','meta=','mode=','order=','page=','par=','pays=','pg=','pos=','prg=','qc=','refer=','sa=','safe=','sc=','sort=','src=','start=','style=','stype=','sum=','tag=','temp=','theme=','type=','url=','user=','width=','what=','\\.x=','\\.y=','y=','look='); + +# SearchEnginesKnownUTFCoding +# Known parameter that proves a search engine has coded its parameters in UTF-8 +#------------------------------------------------------------------------------ +%SearchEnginesKnownUTFCoding=( +# Most common search engines +'google','ie=utf-8', +'alltheweb','cs=utf-8' +); + + +# SearchEnginesHashLib +# List of search engines names +# 'search_engine_id', 'search_engine_name', +#------------------------------------------------------------------------------ +%SearchEnginesHashLib=( +# Major international search engines +'alexa','Alexa', +'alltheweb','AllTheWeb', +'altavista','AltaVista', +'a9', 'A9', +'dmoz','DMOZ', +'google_base','Google (Base)', +'google_froogle','Froogle (Google)', +'google_groups','Google (Groups)', +'google_image','Google (Images)', +'google_cache','Google (cache)', +'google','Google', +'lycos','Lycos', +'msn','MSN Search', +'live','Windows Live', +'netscape','Netscape', +'aol','AOL', +'terra','Terra', +'tiscali','Tiscali', +'voila','Voila', +'search.com','Search.com', +'yahoo_mindset','Yahoo! Mindset', +'yahoo','Yahoo!', +'sympatico','Sympatico', +'excite','Excite', +# Minor international search engines +'google4counter','4-counter (Google)', +'att','AT&T search (powered by Google)', +'go','Go.com', +'askde','Ask Deutschland', +'askes','Ask España', # break out Ask country specific engines. +'askfr','Ask France', +'askit','Ask Italia', +'asknl','Ask Nederland', +'ask','Ask', +'atomz','Atomz', +'dejanews','DejaNews', +'euroseek','Euroseek', +'findarticles','Find Articles', +'go2net','Go2Net (Metamoteur)', +'hotbot','Hotbot', +'infospace','InfoSpace', +'kvasir','Kvasir', +'looksmart','Looksmart', +'mamma','Mamma', +'metacrawler','MetaCrawler (Metamoteur)', +'nbci','NBCI', +'northernlight','NorthernLight', +'overture','Overture', # Replace 'goto\.com','Goto.com', +'dogpile','Dogpile', +'spray','Spray', +'teoma','Teoma', # Replace 'directhit\.com','DirectHit', +'webcrawler','WebCrawler', +'wisenut','WISENut', +'ixquick','ix quick', +'earthlink', 'Earth Link', +'iune','i-une', +'blingo','Blingo', +'centraldatabase','GPU p2p search', +'clusty','Clusty', +'mysearch','My Search', +'vivisimo','Vivisimo', +'kartoo','Kartoo', +'icerocket','Icerocket (Blog)', +'sphere','Sphere (Blog)', +'ledix','Ledix', +'shawca','Shaw.ca', +'searchalot','Searchalot', +'copernic','Copernic', +'avantfind','Avantfind', +'steadysearch','Avantfind', +# Chello Portals +'chelloat','Chello Austria', +'chellobe','Chello Belgium', +'chellocz','Chello Czech Republic', +'chellofr','Chello France', +'chellohu','Chello Hungary', +'chellonl','Chello Netherlands', +'chellono','Chello Norway', +'chellopl','Chello Poland', +'chellose','Chello Sweden', +'chellosk','Chello Slovakia', +'chellocom','Chello (Country not recognized)', +# Mirago +'miragobe','Mirago Belgium', +'miragoch','Mirago Switzerland', +'miragode','Mirago Germany', +'miragodk','Mirago Denmark', +'miragoes','Mirago Spain', +'miragofr','Mirago France', +'miragoit','Mirago Italy', +'miragonl','Mirago Netherlands', +'miragono','Mirago Norway', +'miragose','Mirago Sweden', +'miragocouk','Mirago UK', +'mirago','Mirago (country unknown)', +'answerbus','Answerbus', +'icq','icq', +'nusearch','Nusearch', +'goodsearch','GoodSearch', +'scroogle','Scroogle', +'questionanswering','Questionanswering', +'mywebsearch','MyWebSearch', +'comettoolbar','Comet toolbar search', +# Social Bookmarking Services +'delicious','del.icio.us (Social Bookmark)', +'digg','Digg (Social Bookmark)', +'stumbleupon','Stumbleupon (Social Bookmark)', +'swik','Swik (Social Bookmark)', +'segnalo','Segnalo (Social Bookmark)', +'ineffabile','Ineffabile.it (Social Bookmark)', +# Minor Australian search engines +'anzwers','anzwers.com.au', +# Minor brazilian search engines +'engine','Cade', 'miner','Meta Miner', +# Minor chinese search engines +'baidu','Baidu', 'sina','Sina', 'sohu','Sohu', 'sogou','Sogou', +# Minor czech search engines +'atlas','Atlas.cz', 'seznam','Seznam', 'quick','Quick.cz', 'centrum','Centrum.cz', 'jyxo','Jyxo.cz', 'najdi','Najdi.to', 'redbox','RedBox.cz', +# Minor danish search-engines +'opasia','Opasia', 'danielsen','Thor (danielsen.com)', 'sol','SOL', 'jubii','Jubii', 'finddk','Find', 'edderkoppen','Edderkoppen', 'netstjernen','Netstjernen', 'orbis','Orbis', 'tyfon','Tyfon', '1klik','1Klik', 'ofir','Ofir', +# Minor dutch search engines +'ilse','Ilse','vindex','Vindex\.nl', +# Minor english search engines +'askuk','Ask UK', +'bbc','BBC', 'freeserve','Freeserve', 'looksmartuk','Looksmart UK', +'splut','Splut', 'spotjockey','Spotjockey', 'ukdirectory','UK Directory', 'ukindex','UKIndex', 'ukplus','UK Plus', 'searchy','searchy.co.uk', +# Minor finnish search engines +'haku','Ihmemaa', +# Minor french search engines +'aolfr','AOL (fr)', 'ctrouve','C\'est trouvé', 'francite','Francité', 'lbb', 'LBB', 'libertysurf', 'Libertysurf', 'free', 'Free.fr', 'clubinternet', 'Club-internet', +'toile', 'Toile du Québec', +'biglotron','Biglotron', +'mozbot','Mozbot', +# Minor German search engines +'aolde','AOL (de)', +'fireball','Fireball', 'infoseek','Infoseek', 'webde','Web.de', +'abacho','Abacho', 't-online','T-Online', +'allesklar','allesklar.de', 'meinestadt','meinestadt.de', +'metaspinner','metaspinner', +'metacrawler_de','metacrawler.de', +'wwweasel','WWWeasel', +'netluchs','Netluchs', +'schoenerbrausen','Schoenerbrausen/', +# Minor hungarian search engines +'heureka','Heureka', 'origo','Origo-Vizsla', 'lapkereso','Startlapkeresõ', 'goliat','Góliát', 'indexhu','Index', 'wahoo','Wahoo', 'webmania','webmania.hu', 'internetto','Internetto Keresõ', +'tango_hu','Tango', +'keresolap_hu','Tango keresolap', +'polymeta_hu','Polymeta', +# Minor Indian search engines +'sify','Sify', +# Minor Italian search engines +'virgilio','Virgilio', +'arianna','Arianna', +'supereva','Supereva', +'kataweb','Kataweb', +'aliceitmaster','search.alice.it.master', +'aliceit','alice.it', +'gotuneed','got u need', +'godado','Godado.it', +'jumpy\.it','Jumpy.it', +'shinyseek\.it','Shinyseek.it', +'teecnoit','Teecno', +# Minor Japanese search engines +'askjp','Ask Japan', +'sagool','Sagool', +# Minor Norwegian search engines +'start','start.no', 'eniro','Eniro', +# Minor polish search engines +'wp','Wirtualna Polska', +'onetpl','Onet.pl', +'dodajpl','Dodaj.pl', +'gazetapl','Gazeta.pl', +'gerypl','Gery.pl', +'hogapl','Hoga.pl', +'netsprintpl','NetSprint.pl', +'interiapl','Interia.pl', +'katalogonetpl','Katalog.Onet.pl', +'o2pl','o2.pl', +'polskapl','Polska', +'szukaczpl','Szukacz', +'wowpl','Wow.pl', +# Minor russian search engines +'yandex', 'Yandex', 'aport', 'Aport', 'rambler', 'Rambler', 'turtle', 'Turtle', 'metabot', 'MetaBot', +# Minor Swedish search engines +'passagen','Evreka', +'enirose','Eniro Sverige', +# Minor Slovak search engines +'zoznam','Zoznam', +# Minor Portuguese search engines +'sapo','Sapo', +# Minor Swiss search engines +'searchch', 'search.ch', 'bluewin', 'search.bluewin.ch', +# Generic search engines +'search','Unknown search engines' +); + + +# Sanity check. +# Enable this code and run perl search_engines.pm to check file entries are ok +#----------------------------------------------------------------------------- +#foreach my $key (@SearchEnginesSearchIDOrder_list1) { +# if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID"); +# foreach my $key2 (@SearchEnginesSearchIDOrder_list2) { if ($key2 eq $key) { error("$key is in 1 and 2\n"); } } +# foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 1 and gen\n"); } } +#} } +#foreach my $key (@SearchEnginesSearchIDOrder_list2) { +# if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_list1 with no value in SearchEnginesHashID"); +# foreach my $key2 (@SearchEnginesSearchIDOrder_list1) { if ($key2 eq $key) { error("$key is in 2 and 1\n"); } } +# foreach my $key2 (@SearchEnginesSearchIDOrder_listgen) { if ($key2 eq $key) { error("$key is in 2 and gen\n"); } } +#} } +#foreach my $key (@SearchEnginesSearchIDOrder_listgen) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in SearchEnginesSearchIDOrder_listgen with no value in SearchEnginesHashID"); } } +#foreach my $key (keys %NotSearchEnginesKeys) { if (! $SearchEnginesHashID{$key}) { error("Entry '$key' has been found in NotSearchEnginesKeys with no value in SearchEnginesHashID"); } } +#foreach my $key (keys %SearchEnginesKnownUrl) { +# my $found=0; +# foreach my $key2 (values %SearchEnginesHashID) { +# if ($key eq $key2) { $found=1; last; } +# } +# if (! $found) { die "Entry '$key' has been found in SearchEnginesKnownUrl with no value in SearchEnginesHashID"; } +#} +#foreach my $key (keys %SearchEnginesHashLib) { +# my $found=0; +# foreach my $key2 (values %SearchEnginesHashID) { +# if ($key eq $key2) { $found=1; last; } +# } +# if (! $found) { die "Entry '$key' has been found in SearchEnginesHashLib with no value in SearchEnginesHashID"; } +#} +#print @SearchEnginesSearchIDOrder_list1." ".@SearchEnginesSearchIDOrder_list2." ".@SearchEnginesSearchIDOrder_listgen; + +1;