From: eldy <>
Date: Fri, 10 Aug 2007 21:01:42 +0000 (+0000)
Subject: AWStats - Bugs - 6 items
X-Git-Tag: AWSTATS_6_8_BETA~58
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=89ca43ca753717c32a96f6fc536a4c457dcf041b;p=thirdparty%2FAWStats.git
AWStats - Bugs - 6 items
775988 The lastest: minor Chinese search engine patch
1735647 Chinese search engines for awstats 6.6
1735646 robots patch: feedsky, contentmatch crawler, twiceler, yodao
1735639 Browser patch for Lilina/potu reader
1735637 Chinese translation file for awstats 6.6
1533028 WordToCleanSearchUrl for baidu.com
AWStats - Patches - 6 items
1384243 minor Chinese spider and search engine patch
1569151 TOP 8 Chinese local search engines
745359 Chinese(Simp) update: 6.5 awstats-cn.txt
1569201 top Chinese browser and robot update: TT is not a robot
1569229 Simplified Chinese language file update
1569208 Browser update on potu rss reader and lilina rss reader
AWStats - Feature Requests - 5 items
1384245 exclude referer spam by request type is HEAD
881891 ShowMonthStats: last 12 months vs first month of this year
863298 Counter of spider access on URL-Pages report.
1124526 Meet referer spam: need nofollow
706297 smart IIS timezone solution
---
diff --git a/wwwroot/cgi-bin/lang/awstats-cn.txt b/wwwroot/cgi-bin/lang/awstats-cn.txt
index 10484157..8b94a237 100644
--- a/wwwroot/cgi-bin/lang/awstats-cn.txt
+++ b/wwwroot/cgi-bin/lang/awstats-cn.txt
@@ -1,11 +1,11 @@
-# Chinese (simplified) message file
+# Chinese (simplified) message file (by Che Dong chedongATgmail.com)
# $Revision$ - $Date$
PageCode=gb2312
message0=ÎÞ·¨µÃÖª
message1=ÎÞ·¨µÃÖª(²»ÄÜ·´½âÍøÓòÃû³Æ)
message2=ÆäËû
message3=²é¿´Ïêϸ×ÊÁÏ
-message4=ÐÇÆÚ
+message4=ÈÕÆÚ
message5=ÔÂ
message6=Äê
message7=ͳ¼ÆÍøÕ¾
@@ -25,7 +25,7 @@ message20=ÿСʱ
message21=ä¯ÀÀÆ÷
message22=HTTP ´íÎó
message23=·´ÏàÁ´½Ó
-message24=´Óδ¸üÐÂ
+message24=´Óδ¸üУ¨²Î¿¼ awstats_setup.htmlÉ쵀 'Build/Update'£©
message25=²Î¹ÛÕßµÄÍøÓò»ò¹ú¼Ò
message26=Ö÷»úÊý
message27=ÍøÒ³Êý
@@ -47,7 +47,7 @@ message42=
message43=ÍøÕ¾ËÑË÷µÄ¹Ø¼ü×Ö¾ä
message44=ÍøÕ¾ËÑË÷µÄ¹Ø¼ü×Ö´Ê
message45=ÎÞ·¨·´½âÒëµÄIPµØÖ·
-message46=ÎÞ·¨µÃÖªµÄ×÷ҵϵͳ
+message46=ÎÞ·¨µÃÖªµÄ²Ù×÷ϵͳ
message47=ÕÒ²»µ½µÄÍøÖ·Á¬½á (HTTP ´íÎóÂë 404)
message48=IP µØÖ·
message49=´íÎó´ÎÊý
@@ -61,18 +61,18 @@ message56=
message57=ÎļþÊý
message58=°æ±¾
message59=²Ù×÷ϵͳ
-message60=Ò»ÔÂ
-message61=¶þÔÂ
-message62=ÈýÔÂ
-message63=ËÄÔÂ
-message64=ÎåÔÂ
-message65=ÁùÔÂ
-message66=ÆßÔÂ
-message67=°ËÔÂ
-message68=¾ÅÔÂ
-message69=Ê®ÔÂ
-message70=ʮһÔÂ
-message71=Ê®¶þÔÂ
+message60=1 ÔÂ
+message61=2 ÔÂ
+message62=3 ÔÂ
+message63=4 ÔÂ
+message64=5 ÔÂ
+message65=6 ÔÂ
+message66=7 ÔÂ
+message67=8 ÔÂ
+message68=9 ÔÂ
+message69=10ÔÂ
+message70=11ÔÂ
+message71=12ÔÂ
message72=ä¯ÀÀÆ÷ͳ¼Æ
message73=ÎļþÀà±ð
message74=Á¢¼´¸üÐÂ
@@ -80,7 +80,7 @@ message75=
message76=»Øµ½Ö÷Ò³
message77=ǰ
message78=yyyyÄê mmm ddÈÕ HH:MM
-message79=¹ýÂË
+message79=¹ýÂ˰üº¬
message80=È«²¿Áгö
message81=Ö÷»ú
message82=¸ö½âÒë³É¹¦
@@ -92,7 +92,7 @@ message87=
message88=ËÄ
message89=Îå
message90=Áù
-message91=°´ÐÇÆÚ¼¸
+message91=°´ÐÇÆÚ
message92=°´²Î¹ÛÕß
message93=°´²Î¹Ûʱ¼ä
message94=¼ø±ð³öµÄÓû§
@@ -154,7 +154,7 @@ message149=
message150=´óС
message151=µÚÒ»¸ö
message152=×îĩһ¸ö
-message153=¹ýÂËÆ÷
+message153=¹ýÂ˲»°üº¬
message154=·Çä¯ÀÀÆ÷²úÉúµÄÁ÷Á¿£¨À´×ÔËÑË÷ÒýÇæ»úÆ÷ÈË£¬²¡¶¾È䳿µÈ£©
message155=¼¯Èº
message156=ÒÔÉÏÁгöµÄËÑË÷ÒýÇæ»úÆ÷È˲úÉúµÄ¡°·Çä¯ÀÀÆ÷¡±Á÷Á¿²¢Î´°üº¬ÔÚÆäËûͼ±íÖÐ
@@ -166,3 +166,11 @@ message161=
message162=°´ÔÂÀúʷͳ¼Æ
message163=È䳿
message164=²»Í¬µÄÈ䳿
+message165=³É¹¦·¢ËÍÓʼþ
+message166=Óʼþʧ°Ü»ò¾ÜÊÕ
+message167=Ãô¸ÐÄ¿±ê
+message168=Javascript½ûÓÃ
+message169=´´½¨Õß
+message170=²å¼þ
+message171=µØÇø
+message172=³ÇÊÐ
diff --git a/wwwroot/cgi-bin/lib/browsers.pm b/wwwroot/cgi-bin/lib/browsers.pm
index d21de94c..5e618f33 100644
--- a/wwwroot/cgi-bin/lib/browsers.pm
+++ b/wwwroot/cgi-bin/lib/browsers.pm
@@ -16,7 +16,6 @@
# Gregarius devlog.gregarius.net/docs/ua (rss)
# hatena rss http://r.hatena.ne.jp/ (rss)
# Liferea http://liferea.sourceforge.net/ (rss)
-# lilina http://lilina.sourceforge.net/ (rss)
# PubSub-RSS-Reader http://www.pubsub.com/ (rss)
# 2006-05-20 Sean Carlos http://www.antezeta.com/awstats.html
# Potu Rss-Reader http://www.potu.com/
@@ -130,14 +129,13 @@
'hatena_rss',
'jetbrains_omea',
'liferea',
-'lilina',
'netnewswire',
'newsfire',
'newsgator',
'newzcrawler',
'plagger',
'pluck',
-'potu_rss\-reader',
+'potu',
'pubsub\-rss\-reader',
'pulpfiction',
'rssbandit',
@@ -315,14 +313,13 @@
'hatena_rss','Hatena (RSS Reader)',
'jetbrains_omea', 'Omea (RSS Reader)',
'liferea','Liferea (RSS Reader)',
-'lilina','lilina (RSS Reader)',
'netnewswire', 'NetNewsWire (RSS Reader)',
'newsfire', 'NewsFire (RSS Reader)',
'newsgator', 'NewsGator (RSS Reader)',
'newzcrawler', 'NewzCrawler (RSS Reader)',
'plagger', 'Plagger (RSS Reader)',
'pluck', 'Pluck (RSS Reader)',
-'potu_rss\-reader','Potu (RSS Reader)',
+'potu','Potu (RSS Reader)',
'pubsub\-rss\-reader','PubSub (RSS Reader)',
'pulpfiction', 'PulpFiction (RSS Reader)',
'rssbandit', 'RSS Bandit (RSS Reader)',
@@ -528,14 +525,13 @@
'hatena_rss', 'rss',
'jetbrains_omea', 'rss',
'liferea', 'rss',
-'lilina', 'rss',
'netnewswire', 'rss',
'newsfire', 'rss',
'newsgator', 'rss',
'newzcrawler', 'rss',
'plagger', 'rss',
'pluck', 'rss',
-'potu_rss\-reader', 'rss',
+'potu', 'rss',
'pubsub\-rss\-reader', 'rss',
'pulpfiction', 'rss',
'rssbandit', 'rss',
diff --git a/wwwroot/cgi-bin/lib/robots.pm b/wwwroot/cgi-bin/lib/robots.pm
index e26d22f1..a90ac17f 100644
--- a/wwwroot/cgi-bin/lib/robots.pm
+++ b/wwwroot/cgi-bin/lib/robots.pm
@@ -97,7 +97,6 @@
# added nicebot http://www.egghelp.org/setup.htm ?
# added Snappy http://www.urltrends.com/faq.php
# added sohu agent
-# added TencentTraveler
# added VORTEX http://marty.anstey.ca/robots/vortex/ [matthys70 users.sourceforge.net]
# added zspider http://feedback.redkolibri.com/
# 2006-01-13
@@ -177,8 +176,6 @@
# added oBot http://www.webmasterworld.com/forum11/1616.htm
# added Onfolio http://www.onfolio.com/ (IE Toolbar plugin) - hit rss feeds.
# added ping.blo.gs http://blo.gs/ping.php blog bot
-# added sogou spider http://corp.sohu.com/20051130/n240842344.shtml
-# added sogou test http://corp.sohu.com/20051130/n240842344.shtml
# added Sphere Scout http://www.sphere.com/
# added sproose crawler http://www.sproose.com/bot.html
# added SyndicAPI http://syndicapi.com/bot.html
@@ -342,6 +339,7 @@
'architext',
'jeeves',
'bjaaland',
+'contentmatch',
'ferret',
'googlebot',
'google-sitemaps',
@@ -350,6 +348,7 @@
'harvest',
'htdig',
'linkwalker',
+'lilina',
'lycos_',
'moget',
'muscatferret',
@@ -446,6 +445,7 @@
'esther',
'evliyacelebi',
'fastcrawler',
+'feedcrawl',
'fdse',
'felix',
'fetchrover',
@@ -462,6 +462,7 @@
'getbot',
'geturl',
'golem',
+'gougou',
'grapnel',
'griffon',
'gromit',
@@ -472,6 +473,9 @@
'htmlgobble',
'hyperdecontextualizer',
'iajabot',
+'iaskspider',
+'hl_ftien_spider',
+'sogou',
'iconoclast',
'ilse',
'imagelock',
@@ -596,6 +600,7 @@
'sven',
'tach_bw',
'tagyu\sagent',
+'tailrank',
'tarantula',
'tarspider',
'techbot',
@@ -756,6 +761,7 @@
'feedfetcher\-google',
'feedflow',
'feedster',
+'feedsky',
'feedvalidator',
'filmkamerabot',
'findlinks',
@@ -834,7 +840,6 @@
'mizzu\slabs',
'mj12bot',
'mojeekbot',
-'tencenttraveler', # Must be before msiecrawler
'msiecrawler',
'ms\ssearch\s4\.0\srobot',
'msrabot',
@@ -862,7 +867,8 @@
'openwebspider',
'oracle\sultra\ssearch',
'orbiter',
-'outfoxbot',
+'yodaobot',
+'qihoobot',
'passwordmaker\.org',
'pear\shttp\srequest\sclass',
'peerbot',
@@ -898,8 +904,6 @@
'shoutcast',
'slysearch',
'snap\.com\sbeta\scrawler',
-'sogou\sspider',
-'sogou\stest',
'sohu\-search',
'sohu', # "sohu agent"
'snappy',
@@ -925,6 +929,7 @@
'turtlescanner', # Must be before turtle
'turtle',
'tutorgigbot',
+'twiceler',
'ubicrawler',
'ultraseek',
'unchaos\sbot\shybrid\sweb\ssearch\sengine',
@@ -973,6 +978,7 @@
'yooglifetchagent',
'z\-add\slink\schecker',
'zealbot',
+'zhuaxia',
'zspider',
'zeus',
'ng\/1\.', # put at end to avoid false positive
@@ -1000,6 +1006,7 @@
'jeeves','Ask',
'bjaaland','Bjaaland',
'ferret','Wild Ferret Web Hopper #1, #2, #3',
+'contentmatch','Yahoo!China ContentMatch Crawler',
'googlebot','Googlebot',
'google-sitemaps', 'Google Sitemaps',
'gulliver','Northern Light Gulliver',
@@ -1007,6 +1014,7 @@
'harvest','Harvest',
'htdig','ht://Dig',
'linkwalker','LinkWalker',
+'lilina','Lilina',
'lycos_','Lycos',
'moget','moget',
'muscatferret','Muscat Ferret',
@@ -1100,6 +1108,7 @@
'esther','Esther',
'evliyacelebi','Evliya Celebi',
'fastcrawler','FastCrawler',
+'feedcrawl','FeedCrawl by feed@aobo.com',
'fdse','Fluid Dynamics Search Engine robot',
'felix','Felix IDE',
'fetchrover','FetchRover',
@@ -1116,6 +1125,7 @@
'getbot','GetBot',
'geturl','GetURL',
'golem','Golem',
+'gougou','GouGou',
'grapnel','Grapnel/0.01 Experiment',
'griffon','Griffon',
'gromit','Gromit',
@@ -1126,6 +1136,9 @@
'htmlgobble','HTMLgobble',
'hyperdecontextualizer','Hyper-Decontextualizer',
'iajabot','iajaBot',
+'iaskspider','Sina Iask Spider',
+'hl_ftien_spider','Hylanda',
+'sogou','Sogou Spider',
'iconoclast','Popular Iconoclast',
'ilse','Ingrid',
'imagelock','Imagelock',
@@ -1238,11 +1251,11 @@
'smartspider','Smart Spider',
'snooper','Snooper',
'solbot','Solbot',
-'speedy','Speedy Spider',
+'speedy','Speedy Spider',
'spider_monkey','spider_monkey',
'spiderbot','SpiderBot',
'spiderline','Spiderline Crawler',
-'spiderman','SpiderMan',
+'spiderman','Spiderman',
'spiderview','SpiderView(tm)',
'spry','Spry Wizard Robot',
'ssearcher','Site Searcher',
@@ -1255,6 +1268,7 @@
'tagyu\sagent','Tagyu Agent',
'tarantula','Tarantula',
'tarspider','tarspider',
+'tailrank','TailRank',
'techbot','TechBOT',
'templeton','Templeton',
'titan','TITAN',
@@ -1388,6 +1402,7 @@
'feedfetcher\-google','Feedfetcher-Google',
'feedflow','FeedFlow',
'feedster','Feedster',
+'feedsky','FeedSky',
'feedvalidator','FeedValidator',
'filmkamerabot','FilmkameraBot',
'findexa\scrawler','Findexa Crawler',
@@ -1463,7 +1478,6 @@
'mizzu\slabs','Mizzu Labs',
'mj12bot','MJ12bot',
'mojeekbot','MojeekBot',
-'tencenttraveler','TencentTraveler', # Must be before msiecrawler.
'msiecrawler','MSIECrawler',
'ms\ssearch\s4\.0\srobot','MS SharePoint Portal Server - MS Search 4.0 Robot',
'msrabot','msrabot',
@@ -1491,7 +1505,8 @@
'openwebspider','OpenWebSpider',
'oracle\sultra\ssearch','Oracle Ultra Search',
'orbiter','Orbiter',
-'outfoxbot','OutfoxBot',
+'yodaobot','OutfoxBot/YodaoBot',
+'qihoobot','QihooBot',
'passwordmaker\.org','passwordmaker.org',
'pear\shttp\srequest\sclass','PEAR HTTP Request class',
'peerbot','PEERbot',
@@ -1528,8 +1543,6 @@
'shoutcast','Shoutcast Directory Service',
'slysearch','SlySearch',
'snap\.com\sbeta\scrawler','snap.com beta crawler',
-'sogou\sspider','sogou spider',
-'sogou\stest','sogou test',
'sohu\-search','sohu-search',
'sohu','sohu agent',
'snappy','Snappy',
@@ -1555,6 +1568,7 @@
'turtle', 'Turtle',
'turtlescanner', 'Turtle',
'tutorgigbot','TutorGigBot',
+'twiceler','twiceler',
'ubicrawler','UbiCrawler',
'ultraseek', 'Ultraseek',
'unchaos\sbot\shybrid\sweb\ssearch\sengine','UnChaos Bot Hybrid Web Search Engine',
@@ -1603,6 +1617,7 @@
'yooglifetchagent','yoogliFetchAgent',
'z\-add\slink\schecker','Z-Add Link Checker',
'zealbot','ZealBot',
+'zhuaxia','ZhuaXia',
'zspider','zspider',
'zeus','Zeus Webster Pro',
'ng\/1\.','NG 1.x (Exalead)', # put at end to avoid false positive
diff --git a/wwwroot/cgi-bin/lib/search_engines.pm b/wwwroot/cgi-bin/lib/search_engines.pm
index f8b24f38..86fb9507 100644
--- a/wwwroot/cgi-bin/lib/search_engines.pm
+++ b/wwwroot/cgi-bin/lib/search_engines.pm
@@ -285,7 +285,15 @@
# Minor brazilian search engines
'engine\.exe', 'miner\.bol\.com\.br',
# Minor chinese search engines
-'baidu\.com','search\.sina\.com','search\.sohu\.com', 'sogou\.com',
+'\.baidu\.com', # baidu search portal
+'\.vnet\.cn', # powered by MSN
+'\.soso\.com', # powered by Google
+'\.sogou\.com', # powered by Sohu
+'\.3721\.com', # powered by Yahoo!
+'iask\.com', # powered by Sina
+'\.accoona\.com', # Accoona
+'\.163\.com', # powered by Google
+'\.zhongsou\.com', # zhongsou search portal
# Minor czech search engines
'atlas\.cz','seznam\.cz','quick\.cz','centrum\.cz','jyxo\.(cz|com)','najdi\.to','redbox\.cz',
# Minor danish search-engines
@@ -491,10 +499,15 @@
'engine\.exe','engine',
'miner\.bol\.com\.br','miner',
# Minor chinese search engines
-'baidu\.com','baidu',
-'search\.sina\.com','sina',
-'search\.sohu\.com','sohu',
-'sogou\.com','sogou',
+'\.baidu\.com','baidu',
+'iask\.com','iask',
+'\.accoona\.com','accoona',
+'\.3721\.com','3721',
+'\.163\.com','netease',
+'\.soso\.com','soso',
+'\.zhongsou\.com','zhongsou',
+'\.vnet\.cn','vnet',
+'\.sogou\.com','sogou',
# Minor czech search engines
'atlas\.cz','atlas',
'seznam\.cz','seznam',
@@ -750,7 +763,15 @@
# Minor brazilian search engines
'engine','p1=', 'miner','q=',
# Minor chinese search engines
-'baidu','word=', 'sina', 'word=', 'sohu','word=', 'sogou', 'query=',
+'baidu','(wd|word)=',
+'iask','(w|k)=',
+'accoona','qt=',
+'3721','(p|name)=',
+'netease','q=',
+'soso','q=',
+'zhongsou','(word|w)=',
+'sogou', 'query=',
+'vnet','kw=',
# Minor czech search engines
'atlas','searchtext=', 'seznam','w=', 'quick','query=', 'centrum','q=', 'jyxo','s=', 'najdi','dotaz=', 'redbox','srch=',
# Minor danish search engines
@@ -834,7 +855,7 @@
# If no rules are known, WordsToExtractSearchUrl will be used to search keyword parameter
# If no rules are known and search in WordsToExtractSearchUrl failed, this will be used to clean URL of not keyword parameters.
#------------------------------------------------------------------------------
-@WordsToExtractSearchUrl= ('ask=','claus=','general=','key=','kw=','keyword=','keywords=','MT=','p=','q=','qr=','qt=','query=','s=','search=','searchText=','string=','su=','txtsearch=','w=');
+@WordsToExtractSearchUrl= ('tn=','ie=','ask=','claus=','general=','key=','kw=','keyword=','keywords=','MT=','p=','q=','qr=','qt=','query=','s=','search=','searchText=','string=','su=','txtsearch=','w=');
@WordsToCleanSearchUrl= ('act=','annuaire=','btng=','cat=','categoria=','cfg=','cof=','cou=','count=','cp=','dd=','domain=','dt=','dw=','enc=','exec=','geo=','hc=','height=','hits=','hl=','hq=','hs=','id=','kl=','lang=','loc=','lr=','matchmode=','medor=','message=','meta=','mode=','order=','page=','par=','pays=','pg=','pos=','prg=','qc=','refer=','sa=','safe=','sc=','sort=','src=','start=','style=','stype=','sum=','tag=','temp=','theme=','type=','url=','user=','width=','what=','\\.x=','\\.y=','y=','look=');
# SearchEnginesKnownUTFCoding
@@ -969,7 +990,15 @@
# Minor brazilian search engines
'engine','Cade', 'miner','Meta Miner',
# Minor chinese search engines
-'baidu','Baidu', 'sina','Sina', 'sohu','Sohu', 'sogou','Sogou',
+'baidu','Baidu',
+'iask','Iask',
+'accoona','Accoona',
+'3721','3721',
+'netease', 'NetEase',
+'soso','SoSo',
+'zhongsou','ZhongSou',
+'sogou', 'SoGou',
+'vnet','VNet',
# Minor czech search engines
'atlas','Atlas.cz', 'seznam','Seznam', 'quick','Quick.cz', 'centrum','Centrum.cz', 'jyxo','Jyxo.cz', 'najdi','Najdi.to', 'redbox','RedBox.cz',
# Minor danish search-engines
@@ -1082,4 +1111,4 @@
#}
#print @SearchEnginesSearchIDOrder_list1." ".@SearchEnginesSearchIDOrder_list2." ".@SearchEnginesSearchIDOrder_listgen;
-1;
\ No newline at end of file
+1;