From: eldy <> Date: Fri, 10 Aug 2007 21:01:42 +0000 (+0000) Subject: AWStats - Bugs - 6 items X-Git-Tag: AWSTATS_6_8_BETA~58 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=89ca43ca753717c32a96f6fc536a4c457dcf041b;p=thirdparty%2FAWStats.git AWStats - Bugs - 6 items 775988 The lastest: minor Chinese search engine patch 1735647 Chinese search engines for awstats 6.6 1735646 robots patch: feedsky, contentmatch crawler, twiceler, yodao 1735639 Browser patch for Lilina/potu reader 1735637 Chinese translation file for awstats 6.6 1533028 WordToCleanSearchUrl for baidu.com AWStats - Patches - 6 items 1384243 minor Chinese spider and search engine patch 1569151 TOP 8 Chinese local search engines 745359 Chinese(Simp) update: 6.5 awstats-cn.txt 1569201 top Chinese browser and robot update: TT is not a robot 1569229 Simplified Chinese language file update 1569208 Browser update on potu rss reader and lilina rss reader AWStats - Feature Requests - 5 items 1384245 exclude referer spam by request type is HEAD 881891 ShowMonthStats: last 12 months vs first month of this year 863298 Counter of spider access on URL-Pages report. 1124526 Meet referer spam: need nofollow 706297 smart IIS timezone solution --- diff --git a/wwwroot/cgi-bin/lang/awstats-cn.txt b/wwwroot/cgi-bin/lang/awstats-cn.txt index 10484157..8b94a237 100644 --- a/wwwroot/cgi-bin/lang/awstats-cn.txt +++ b/wwwroot/cgi-bin/lang/awstats-cn.txt @@ -1,11 +1,11 @@ -# Chinese (simplified) message file +# Chinese (simplified) message file (by Che Dong chedongATgmail.com) # $Revision$ - $Date$ PageCode=gb2312 message0=ÎÞ·¨µÃÖª message1=ÎÞ·¨µÃÖª(²»ÄÜ·´½âÍøÓòÃû³Æ) message2=ÆäËû message3=²é¿´Ïêϸ×ÊÁÏ -message4=ÐÇÆÚ +message4=ÈÕÆÚ message5=Ô message6=Äê message7=ͳ¼ÆÍøÕ¾ @@ -25,7 +25,7 @@ message20=ÿСʱ message21=ä¯ÀÀÆ÷ message22=HTTP ´íÎó message23=·´ÏàÁ´½Ó -message24=´Óδ¸üР+message24=´Óδ¸üУ¨²Î¿¼ awstats_setup.htmlÉ쵀 'Build/Update'£© message25=²Î¹ÛÕßµÄÍøÓò»ò¹ú¼Ò message26=Ö÷»úÊý message27=ÍøÒ³Êý @@ -47,7 +47,7 @@ message42= message43=ÍøÕ¾ËÑË÷µÄ¹Ø¼ü×Ö¾ä message44=ÍøÕ¾ËÑË÷µÄ¹Ø¼ü×Ö´Ê message45=ÎÞ·¨·´½âÒëµÄIPµØÖ· -message46=ÎÞ·¨µÃÖªµÄ×÷ҵϵͳ +message46=ÎÞ·¨µÃÖªµÄ²Ù×÷ϵͳ message47=ÕÒ²»µ½µÄÍøÖ·Á¬½á (HTTP ´íÎóÂë 404) message48=IP µØÖ· message49=´íÎó´ÎÊý @@ -61,18 +61,18 @@ message56= message57=ÎļþÊý message58=°æ±¾ message59=²Ù×÷ϵͳ -message60=Ò»Ô -message61=¶þÔ -message62=ÈýÔ -message63=ËÄÔ -message64=ÎåÔ -message65=ÁùÔ -message66=ÆßÔ -message67=°ËÔ -message68=¾ÅÔ -message69=ʮԠ-message70=ʮһÔ -message71=Ê®¶þÔ +message60=1 Ô +message61=2 Ô +message62=3 Ô +message63=4 Ô +message64=5 Ô +message65=6 Ô +message66=7 Ô +message67=8 Ô +message68=9 Ô +message69=10Ô +message70=11Ô +message71=12Ô message72=ä¯ÀÀÆ÷ͳ¼Æ message73=ÎļþÀà±ð message74=Á¢¼´¸üР@@ -80,7 +80,7 @@ message75= message76=»Øµ½Ö÷Ò³ message77=ǰ message78=yyyyÄê mmm ddÈÕ HH:MM -message79=¹ýÂË +message79=¹ýÂ˰üº¬ message80=È«²¿Áгö message81=Ö÷»ú message82=¸ö½âÒë³É¹¦ @@ -92,7 +92,7 @@ message87= message88=ËÄ message89=Îå message90=Áù -message91=°´ÐÇÆÚ¼¸ +message91=°´ÐÇÆÚ message92=°´²Î¹ÛÕß message93=°´²Î¹Ûʱ¼ä message94=¼ø±ð³öµÄÓû§ @@ -154,7 +154,7 @@ message149= message150=´óС message151=µÚÒ»¸ö message152=×îĩһ¸ö -message153=¹ýÂËÆ÷ +message153=¹ýÂ˲»°üº¬ message154=·Çä¯ÀÀÆ÷²úÉúµÄÁ÷Á¿£¨À´×ÔËÑË÷ÒýÇæ»úÆ÷ÈË£¬²¡¶¾È䳿µÈ£© message155=¼¯Èº message156=ÒÔÉÏÁгöµÄËÑË÷ÒýÇæ»úÆ÷È˲úÉúµÄ¡°·Çä¯ÀÀÆ÷¡±Á÷Á¿²¢Î´°üº¬ÔÚÆäËûͼ±íÖÐ @@ -166,3 +166,11 @@ message161= message162=°´ÔÂÀúʷͳ¼Æ message163=È䳿 message164=²»Í¬µÄÈ䳿 +message165=³É¹¦·¢ËÍÓʼþ +message166=Óʼþʧ°Ü»ò¾ÜÊÕ +message167=Ãô¸ÐÄ¿±ê +message168=Javascript½ûÓà +message169=´´½¨Õß +message170=²å¼þ +message171=µØÇø +message172=³ÇÊÐ diff --git a/wwwroot/cgi-bin/lib/browsers.pm b/wwwroot/cgi-bin/lib/browsers.pm index d21de94c..5e618f33 100644 --- a/wwwroot/cgi-bin/lib/browsers.pm +++ b/wwwroot/cgi-bin/lib/browsers.pm @@ -16,7 +16,6 @@ # Gregarius devlog.gregarius.net/docs/ua (rss) # hatena rss http://r.hatena.ne.jp/ (rss) # Liferea http://liferea.sourceforge.net/ (rss) -# lilina http://lilina.sourceforge.net/ (rss) # PubSub-RSS-Reader http://www.pubsub.com/ (rss) # 2006-05-20 Sean Carlos http://www.antezeta.com/awstats.html # Potu Rss-Reader http://www.potu.com/ @@ -130,14 +129,13 @@ 'hatena_rss', 'jetbrains_omea', 'liferea', -'lilina', 'netnewswire', 'newsfire', 'newsgator', 'newzcrawler', 'plagger', 'pluck', -'potu_rss\-reader', +'potu', 'pubsub\-rss\-reader', 'pulpfiction', 'rssbandit', @@ -315,14 +313,13 @@ 'hatena_rss','Hatena (RSS Reader)', 'jetbrains_omea', 'Omea (RSS Reader)', 'liferea','Liferea (RSS Reader)', -'lilina','lilina (RSS Reader)', 'netnewswire', 'NetNewsWire (RSS Reader)', 'newsfire', 'NewsFire (RSS Reader)', 'newsgator', 'NewsGator (RSS Reader)', 'newzcrawler', 'NewzCrawler (RSS Reader)', 'plagger', 'Plagger (RSS Reader)', 'pluck', 'Pluck (RSS Reader)', -'potu_rss\-reader','Potu (RSS Reader)', +'potu','Potu (RSS Reader)', 'pubsub\-rss\-reader','PubSub (RSS Reader)', 'pulpfiction', 'PulpFiction (RSS Reader)', 'rssbandit', 'RSS Bandit (RSS Reader)', @@ -528,14 +525,13 @@ 'hatena_rss', 'rss', 'jetbrains_omea', 'rss', 'liferea', 'rss', -'lilina', 'rss', 'netnewswire', 'rss', 'newsfire', 'rss', 'newsgator', 'rss', 'newzcrawler', 'rss', 'plagger', 'rss', 'pluck', 'rss', -'potu_rss\-reader', 'rss', +'potu', 'rss', 'pubsub\-rss\-reader', 'rss', 'pulpfiction', 'rss', 'rssbandit', 'rss', diff --git a/wwwroot/cgi-bin/lib/robots.pm b/wwwroot/cgi-bin/lib/robots.pm index e26d22f1..a90ac17f 100644 --- a/wwwroot/cgi-bin/lib/robots.pm +++ b/wwwroot/cgi-bin/lib/robots.pm @@ -97,7 +97,6 @@ # added nicebot http://www.egghelp.org/setup.htm ? # added Snappy http://www.urltrends.com/faq.php # added sohu agent -# added TencentTraveler # added VORTEX http://marty.anstey.ca/robots/vortex/ [matthys70 users.sourceforge.net] # added zspider http://feedback.redkolibri.com/ # 2006-01-13 @@ -177,8 +176,6 @@ # added oBot http://www.webmasterworld.com/forum11/1616.htm # added Onfolio http://www.onfolio.com/ (IE Toolbar plugin) - hit rss feeds. # added ping.blo.gs http://blo.gs/ping.php blog bot -# added sogou spider http://corp.sohu.com/20051130/n240842344.shtml -# added sogou test http://corp.sohu.com/20051130/n240842344.shtml # added Sphere Scout http://www.sphere.com/ # added sproose crawler http://www.sproose.com/bot.html # added SyndicAPI http://syndicapi.com/bot.html @@ -342,6 +339,7 @@ 'architext', 'jeeves', 'bjaaland', +'contentmatch', 'ferret', 'googlebot', 'google-sitemaps', @@ -350,6 +348,7 @@ 'harvest', 'htdig', 'linkwalker', +'lilina', 'lycos_', 'moget', 'muscatferret', @@ -446,6 +445,7 @@ 'esther', 'evliyacelebi', 'fastcrawler', +'feedcrawl', 'fdse', 'felix', 'fetchrover', @@ -462,6 +462,7 @@ 'getbot', 'geturl', 'golem', +'gougou', 'grapnel', 'griffon', 'gromit', @@ -472,6 +473,9 @@ 'htmlgobble', 'hyperdecontextualizer', 'iajabot', +'iaskspider', +'hl_ftien_spider', +'sogou', 'iconoclast', 'ilse', 'imagelock', @@ -596,6 +600,7 @@ 'sven', 'tach_bw', 'tagyu\sagent', +'tailrank', 'tarantula', 'tarspider', 'techbot', @@ -756,6 +761,7 @@ 'feedfetcher\-google', 'feedflow', 'feedster', +'feedsky', 'feedvalidator', 'filmkamerabot', 'findlinks', @@ -834,7 +840,6 @@ 'mizzu\slabs', 'mj12bot', 'mojeekbot', -'tencenttraveler', # Must be before msiecrawler 'msiecrawler', 'ms\ssearch\s4\.0\srobot', 'msrabot', @@ -862,7 +867,8 @@ 'openwebspider', 'oracle\sultra\ssearch', 'orbiter', -'outfoxbot', +'yodaobot', +'qihoobot', 'passwordmaker\.org', 'pear\shttp\srequest\sclass', 'peerbot', @@ -898,8 +904,6 @@ 'shoutcast', 'slysearch', 'snap\.com\sbeta\scrawler', -'sogou\sspider', -'sogou\stest', 'sohu\-search', 'sohu', # "sohu agent" 'snappy', @@ -925,6 +929,7 @@ 'turtlescanner', # Must be before turtle 'turtle', 'tutorgigbot', +'twiceler', 'ubicrawler', 'ultraseek', 'unchaos\sbot\shybrid\sweb\ssearch\sengine', @@ -973,6 +978,7 @@ 'yooglifetchagent', 'z\-add\slink\schecker', 'zealbot', +'zhuaxia', 'zspider', 'zeus', 'ng\/1\.', # put at end to avoid false positive @@ -1000,6 +1006,7 @@ 'jeeves','Ask', 'bjaaland','Bjaaland', 'ferret','Wild Ferret Web Hopper #1, #2, #3', +'contentmatch','Yahoo!China ContentMatch Crawler', 'googlebot','Googlebot', 'google-sitemaps', 'Google Sitemaps', 'gulliver','Northern Light Gulliver', @@ -1007,6 +1014,7 @@ 'harvest','Harvest', 'htdig','ht://Dig', 'linkwalker','LinkWalker', +'lilina','Lilina', 'lycos_','Lycos', 'moget','moget', 'muscatferret','Muscat Ferret', @@ -1100,6 +1108,7 @@ 'esther','Esther', 'evliyacelebi','Evliya Celebi', 'fastcrawler','FastCrawler', +'feedcrawl','FeedCrawl by feed@aobo.com', 'fdse','Fluid Dynamics Search Engine robot', 'felix','Felix IDE', 'fetchrover','FetchRover', @@ -1116,6 +1125,7 @@ 'getbot','GetBot', 'geturl','GetURL', 'golem','Golem', +'gougou','GouGou', 'grapnel','Grapnel/0.01 Experiment', 'griffon','Griffon', 'gromit','Gromit', @@ -1126,6 +1136,9 @@ 'htmlgobble','HTMLgobble', 'hyperdecontextualizer','Hyper-Decontextualizer', 'iajabot','iajaBot', +'iaskspider','Sina Iask Spider', +'hl_ftien_spider','Hylanda', +'sogou','Sogou Spider', 'iconoclast','Popular Iconoclast', 'ilse','Ingrid', 'imagelock','Imagelock', @@ -1238,11 +1251,11 @@ 'smartspider','Smart Spider', 'snooper','Snooper', 'solbot','Solbot', -'speedy','Speedy Spider', +'speedy','Speedy Spider', 'spider_monkey','spider_monkey', 'spiderbot','SpiderBot', 'spiderline','Spiderline Crawler', -'spiderman','SpiderMan', +'spiderman','Spiderman', 'spiderview','SpiderView(tm)', 'spry','Spry Wizard Robot', 'ssearcher','Site Searcher', @@ -1255,6 +1268,7 @@ 'tagyu\sagent','Tagyu Agent', 'tarantula','Tarantula', 'tarspider','tarspider', +'tailrank','TailRank', 'techbot','TechBOT', 'templeton','Templeton', 'titan','TITAN', @@ -1388,6 +1402,7 @@ 'feedfetcher\-google','Feedfetcher-Google', 'feedflow','FeedFlow', 'feedster','Feedster', +'feedsky','FeedSky', 'feedvalidator','FeedValidator', 'filmkamerabot','FilmkameraBot', 'findexa\scrawler','Findexa Crawler', @@ -1463,7 +1478,6 @@ 'mizzu\slabs','Mizzu Labs', 'mj12bot','MJ12bot', 'mojeekbot','MojeekBot', -'tencenttraveler','TencentTraveler', # Must be before msiecrawler. 'msiecrawler','MSIECrawler', 'ms\ssearch\s4\.0\srobot','MS SharePoint Portal Server - MS Search 4.0 Robot', 'msrabot','msrabot', @@ -1491,7 +1505,8 @@ 'openwebspider','OpenWebSpider', 'oracle\sultra\ssearch','Oracle Ultra Search', 'orbiter','Orbiter', -'outfoxbot','OutfoxBot', +'yodaobot','OutfoxBot/YodaoBot', +'qihoobot','QihooBot', 'passwordmaker\.org','passwordmaker.org', 'pear\shttp\srequest\sclass','PEAR HTTP Request class', 'peerbot','PEERbot', @@ -1528,8 +1543,6 @@ 'shoutcast','Shoutcast Directory Service', 'slysearch','SlySearch', 'snap\.com\sbeta\scrawler','snap.com beta crawler', -'sogou\sspider','sogou spider', -'sogou\stest','sogou test', 'sohu\-search','sohu-search', 'sohu','sohu agent', 'snappy','Snappy', @@ -1555,6 +1568,7 @@ 'turtle', 'Turtle', 'turtlescanner', 'Turtle', 'tutorgigbot','TutorGigBot', +'twiceler','twiceler', 'ubicrawler','UbiCrawler', 'ultraseek', 'Ultraseek', 'unchaos\sbot\shybrid\sweb\ssearch\sengine','UnChaos Bot Hybrid Web Search Engine', @@ -1603,6 +1617,7 @@ 'yooglifetchagent','yoogliFetchAgent', 'z\-add\slink\schecker','Z-Add Link Checker', 'zealbot','ZealBot', +'zhuaxia','ZhuaXia', 'zspider','zspider', 'zeus','Zeus Webster Pro', 'ng\/1\.','NG 1.x (Exalead)', # put at end to avoid false positive diff --git a/wwwroot/cgi-bin/lib/search_engines.pm b/wwwroot/cgi-bin/lib/search_engines.pm index f8b24f38..86fb9507 100644 --- a/wwwroot/cgi-bin/lib/search_engines.pm +++ b/wwwroot/cgi-bin/lib/search_engines.pm @@ -285,7 +285,15 @@ # Minor brazilian search engines 'engine\.exe', 'miner\.bol\.com\.br', # Minor chinese search engines -'baidu\.com','search\.sina\.com','search\.sohu\.com', 'sogou\.com', +'\.baidu\.com', # baidu search portal +'\.vnet\.cn', # powered by MSN +'\.soso\.com', # powered by Google +'\.sogou\.com', # powered by Sohu +'\.3721\.com', # powered by Yahoo! +'iask\.com', # powered by Sina +'\.accoona\.com', # Accoona +'\.163\.com', # powered by Google +'\.zhongsou\.com', # zhongsou search portal # Minor czech search engines 'atlas\.cz','seznam\.cz','quick\.cz','centrum\.cz','jyxo\.(cz|com)','najdi\.to','redbox\.cz', # Minor danish search-engines @@ -491,10 +499,15 @@ 'engine\.exe','engine', 'miner\.bol\.com\.br','miner', # Minor chinese search engines -'baidu\.com','baidu', -'search\.sina\.com','sina', -'search\.sohu\.com','sohu', -'sogou\.com','sogou', +'\.baidu\.com','baidu', +'iask\.com','iask', +'\.accoona\.com','accoona', +'\.3721\.com','3721', +'\.163\.com','netease', +'\.soso\.com','soso', +'\.zhongsou\.com','zhongsou', +'\.vnet\.cn','vnet', +'\.sogou\.com','sogou', # Minor czech search engines 'atlas\.cz','atlas', 'seznam\.cz','seznam', @@ -750,7 +763,15 @@ # Minor brazilian search engines 'engine','p1=', 'miner','q=', # Minor chinese search engines -'baidu','word=', 'sina', 'word=', 'sohu','word=', 'sogou', 'query=', +'baidu','(wd|word)=', +'iask','(w|k)=', +'accoona','qt=', +'3721','(p|name)=', +'netease','q=', +'soso','q=', +'zhongsou','(word|w)=', +'sogou', 'query=', +'vnet','kw=', # Minor czech search engines 'atlas','searchtext=', 'seznam','w=', 'quick','query=', 'centrum','q=', 'jyxo','s=', 'najdi','dotaz=', 'redbox','srch=', # Minor danish search engines @@ -834,7 +855,7 @@ # If no rules are known, WordsToExtractSearchUrl will be used to search keyword parameter # If no rules are known and search in WordsToExtractSearchUrl failed, this will be used to clean URL of not keyword parameters. #------------------------------------------------------------------------------ -@WordsToExtractSearchUrl= ('ask=','claus=','general=','key=','kw=','keyword=','keywords=','MT=','p=','q=','qr=','qt=','query=','s=','search=','searchText=','string=','su=','txtsearch=','w='); +@WordsToExtractSearchUrl= ('tn=','ie=','ask=','claus=','general=','key=','kw=','keyword=','keywords=','MT=','p=','q=','qr=','qt=','query=','s=','search=','searchText=','string=','su=','txtsearch=','w='); @WordsToCleanSearchUrl= ('act=','annuaire=','btng=','cat=','categoria=','cfg=','cof=','cou=','count=','cp=','dd=','domain=','dt=','dw=','enc=','exec=','geo=','hc=','height=','hits=','hl=','hq=','hs=','id=','kl=','lang=','loc=','lr=','matchmode=','medor=','message=','meta=','mode=','order=','page=','par=','pays=','pg=','pos=','prg=','qc=','refer=','sa=','safe=','sc=','sort=','src=','start=','style=','stype=','sum=','tag=','temp=','theme=','type=','url=','user=','width=','what=','\\.x=','\\.y=','y=','look='); # SearchEnginesKnownUTFCoding @@ -969,7 +990,15 @@ # Minor brazilian search engines 'engine','Cade', 'miner','Meta Miner', # Minor chinese search engines -'baidu','Baidu', 'sina','Sina', 'sohu','Sohu', 'sogou','Sogou', +'baidu','Baidu', +'iask','Iask', +'accoona','Accoona', +'3721','3721', +'netease', 'NetEase', +'soso','SoSo', +'zhongsou','ZhongSou', +'sogou', 'SoGou', +'vnet','VNet', # Minor czech search engines 'atlas','Atlas.cz', 'seznam','Seznam', 'quick','Quick.cz', 'centrum','Centrum.cz', 'jyxo','Jyxo.cz', 'najdi','Najdi.to', 'redbox','RedBox.cz', # Minor danish search-engines @@ -1082,4 +1111,4 @@ #} #print @SearchEnginesSearchIDOrder_list1." ".@SearchEnginesSearchIDOrder_list2." ".@SearchEnginesSearchIDOrder_listgen; -1; \ No newline at end of file +1;