From: eldy <> Date: Sat, 6 Dec 2003 21:29:14 +0000 (+0000) Subject: Added decodeUTFkeys plugin to AWStats to show correctly (in language charset) keyword... X-Git-Tag: AWSTATS_6_0_BETA~30 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ee722982edd5b4700a0a3cba809b3817c795b140;p=thirdparty%2FAWStats.git Added decodeUTFkeys plugin to AWStats to show correctly (in language charset) keywords/keyphrases strings even if they were UTF8 coded by the referer search engine. Fixed not working link for search keywords/keyphrase in menu with FireBird. --- diff --git a/docs/awstats_changelog.txt b/docs/awstats_changelog.txt index f0f54cf2..7ede679a 100644 --- a/docs/awstats_changelog.txt +++ b/docs/awstats_changelog.txt @@ -30,6 +30,7 @@ Fixes: - Click on "Summary" now returns to top of page even with rawlog plugin. - Fixed AmigaVoyager detection. - Fixed bug in SkipHosts filter for mail log files. +- Fixed not working link for search keywords/keyphrase in menu with FireBird. New features/improvements: - Increased speed by 10 to 20%. @@ -61,6 +62,9 @@ New features/improvements: - Webmin module updated to 1.210 to integrate all new parameters. - Better setup error messages for newbie. - Reports look better on Mozilla browsers. +- Added decodeUTFkeys plugin to AWStats to show correctly (in language + charset) keywords/keyphrases strings even if they were UTF8 coded by + the referer search engine. - configure.pl: A new script to configure AWStats and Apache and build a simple config file. - awstats_buildstaticpages.pl: The -date option has been replaced @@ -82,7 +86,6 @@ Other/Documentation: - Documentation seriously updated. - FAQ updated. - Debian, Amigavoyager and some missing country flags icons added. -- Prepare code for decodeUTFkeys plugin. - Added Hebrew and Galician language. Note 1: When migrating to 6.x series, if you use the ExtraSections feature, diff --git a/wwwroot/cgi-bin/awstats.model.conf b/wwwroot/cgi-bin/awstats.model.conf index c9aef8d4..b31d3bda 100644 --- a/wwwroot/cgi-bin/awstats.model.conf +++ b/wwwroot/cgi-bin/awstats.model.conf @@ -1134,6 +1134,13 @@ color_x="C1B2E2" # Background color for number of exit pages (Default = "C1B2 # #LoadPlugin="tooltips" +# Plugin: DecodeUTFKeys +# Perl modules required: Encode and URI::Escape +# Allow AWStats to show correctly (in language charset) keywords/keyphrases +# strings even if they were UTF8 coded by the referer search engine. +# +#LoadPlugin="decodeutfkeys" + # Plugin: IPv6 # Perl modules required: Net::IP and Net::DNS # This plugin gives AWStats capability to make reverse DNS lookup on IPv6 diff --git a/wwwroot/cgi-bin/awstats.pl b/wwwroot/cgi-bin/awstats.pl index f99470a6..560b5a19 100644 --- a/wwwroot/cgi-bin/awstats.pl +++ b/wwwroot/cgi-bin/awstats.pl @@ -910,7 +910,6 @@ sub SkipUserAgent { # Return: 0 Not found, 1 Found #------------------------------------------------------------------------------ sub SkipFile { - debug(" Call to SkipFile to check $_[0]"); foreach (@SkipFiles) { if ($_[0] =~ /$_/) { return 1; } } 0; # Not in @SkipFiles } @@ -1766,8 +1765,8 @@ sub Read_Plugins { my $pluginname=$1; if ($pluginname) { if (! $PluginsLoaded{'init'}{"$pluginname"}) { # Plugin not already loaded - my %pluginisfor=('tooltips'=>'o','ipv6'=>'u','hashfiles'=>'u','geoip'=>'u', - 'geoipfree'=>'u','hostinfo'=>'o','userinfo'=>'o','urlalias'=>'o','timehires'=>'u','timezone'=>'ou'); + my %pluginisfor=('ipv6'=>'u','hashfiles'=>'u','geoip'=>'u','geoipfree'=>'u','timehires'=>'u','timezone'=>'ou', + 'decodeutfkeys'=>'o','hostinfo'=>'o','userinfo'=>'o','urlalias'=>'o','tooltips'=>'o'); if ($pluginisfor{$pluginname}) { # Do not load "update plugins" if output only if (! $UpdateStats && scalar keys %HTMLOutput && $pluginisfor{$pluginname} !~ /o/) { $PluginsLoaded{'init'}{"$pluginname"}=1; next; } @@ -4015,39 +4014,14 @@ sub Init_HashArray { # Return: decodedstring #------------------------------------------------------------------------------ sub ChangeWordSeparatorsIntoSpace { - $_[0] =~ s/%1[03]/ /g; - $_[0] =~ s/%2[02789abc]/ /ig; - $_[0] =~ s/%3a/ /ig; + $_[0] =~ s/%1[03]/ /g; # LF,CR + $_[0] =~ s/%2[02789abc]/ /ig; # + $_[0] =~ s/%3a/ /ig; # : $_[0] =~ tr/\+\'\(\)\"\*,:/ /s; # "&" and "=" must not be in this list } #------------------------------------------------------------------------------ -# Function: Converts an UTF8 string to specified Charset -# Parameters: utfstringtodecode charsettoencode -# Return: newencodedstring -#------------------------------------------------------------------------------ -sub Utf8_To_Ascii { -#Function to prepare 'decodeUTFkeys' plugin -#use Encode; -#use URI::Escape; -#my $string = shift; -#my $encoding = shift; -#if ( $string =~ m/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf])*$/ ) -#{ -# $string = decode("utf-8", $string); -# $string = encode($encoding, $string); -#} -## trim space -#$string =~ s/^ +//; -#$string =~ s/ +$//; -## reverse "+", ";" to space -#$string =~ s/;+/\+/g; -#$string =~ s/\s+/\+/g; -#return $string; -} - -#------------------------------------------------------------------------------ -# Function: Encode URL to XML (Transforms & into & as needed in XML/XHTML) +# Function: Transforms & into & as needed in XML/XHTML # Parameters: stringtoencode # Return: encodedstring #------------------------------------------------------------------------------ @@ -6520,7 +6494,6 @@ if ($UpdateStats && $FrameName ne 'index' && $FrameName ne 'mainleft') { # Updat # Now param is keyphrase: "cache:mmm:www/zzz+aaa+bbb/ccc+ddd%20eee'fff,ggg" $param =~ s/^(cache|related):[^\+]+//; &ChangeWordSeparatorsIntoSpace($param); # Change [ aaa+bbb/ccc+ddd%20eee'fff,ggg ] into [ aaa bbb/ccc ddd eee fff ggg] - # TODO Add a plugin to convert utf8 coded params (google, alltheweb) into locally coded string ($PageCode) $param =~ s/^ +//; $param =~ s/ +$//; $param =~ tr/ /\+/s; if ((length $param) > 0) { $_keyphrases{$param}++; } last; @@ -6539,7 +6512,6 @@ if ($UpdateStats && $FrameName ne 'index' && $FrameName ne 'mainleft') { # Updat # Now param is keyphrase: "aaa+bbb/ccc+ddd%20eee'fff,ggg" $param =~ s/^(cache|related):[^\+]+//; &ChangeWordSeparatorsIntoSpace($param); # Change [ aaa+bbb/ccc+ddd%20eee'fff,ggg ] into [ aaa bbb/ccc ddd eee fff ggg ] - # TODO Add a plugin to convert utf8 coded params (google, alltheweb) into locally coded string ($PageCode) $param =~ s/^ +//; $param =~ s/ +$//; $param =~ tr/ /\+/s; if ((length $param) > 2) { $_keyphrases{$param}++; last; } } @@ -6863,7 +6835,7 @@ if (scalar keys %HTMLOutput) { # HTMLHeadSection if ($FrameName ne 'index' && $FrameName ne 'mainleft') { - print " \n\n"; + print " \n\n"; print "$HTMLHeadSection\n"; print "\n"; } @@ -6879,7 +6851,7 @@ if (scalar keys %HTMLOutput) { if ($ShowMenu || $FrameName eq 'mainleft') { if ($Debug) { debug("ShowMenu",2); } my $frame=($FrameName eq 'mainleft'); - print "$Center \n"; + print "$Center \n"; my $WIDTHMENU1=($FrameName eq 'mainleft'?$FRAMEWIDTH:150); @@ -7232,7 +7204,7 @@ if (scalar keys %HTMLOutput) { # if ($HTMLOutput{'alldays'}) { # if ($Debug) { debug("ShowMonthDayStats",2); } -# print "$Center 
\n"; +# print "$Center 
\n"; # &tab_head("$Message[5]",0,0,"alldays"); # # my $NewLinkParams=${QueryString}; @@ -8163,7 +8135,10 @@ if (scalar keys %HTMLOutput) { my $count=0; &BuildKeyList($MaxRowsInHTMLOutput,$MinHit{'Keyphrase'},\%_keyphrases,\%_keyphrases); foreach my $key (@keylist) { - my $mot = CleanFromCSSA(DecodeEncodedString($key)); + my $mot; + # Convert coded keywords (utf8,...) to be correctly reported in HTML page. + if ($PluginsLoaded{'DecodeKey'}{'decodeutfkeys'}) { $mot=CleanFromCSSA(DecodeKey_decodeutfkeys($key,$PageCode||'iso-8859-1')); } + else { $mot = CleanFromCSSA(DecodeEncodedString($key)); } my $p; if ($TotalKeyphrases) { $p=int($_keyphrases{$key}/$TotalKeyphrases*1000)/10; } print "".XMLEncode($mot)."$_keyphrases{$key}$p %\n"; @@ -8189,7 +8164,10 @@ if (scalar keys %HTMLOutput) { my $count=0; &BuildKeyList($MaxRowsInHTMLOutput,$MinHit{'Keyword'},\%_keywords,\%_keywords); foreach my $key (@keylist) { - my $mot = CleanFromCSSA(DecodeEncodedString($key)); + my $mot; + # Convert coded keywords (utf8,...) to be correctly reported in HTML page. + if ($PluginsLoaded{'DecodeKey'}{'decodeutfkeys'}) { $mot=CleanFromCSSA(DecodeKey_decodeutfkeys($key,$PageCode||'iso-8859-1')); } + else { $mot = CleanFromCSSA(DecodeEncodedString($key)); } my $p; if ($TotalKeywords) { $p=int($_keywords{$key}/$TotalKeywords*1000)/10; } print "".XMLEncode($mot)."$_keywords{$key}$p %\n"; @@ -8243,7 +8221,7 @@ if (scalar keys %HTMLOutput) { } if ($HTMLOutput{'info'}) { # Not yet available - print "$Center 
"; + print "$Center 
"; &html_end; } if ($HTMLOutput{'main'}) { @@ -8467,7 +8445,7 @@ if (scalar keys %HTMLOutput) { } } - print "\n \n\n"; + print "\n \n\n"; # BY DAY OF MONTH #--------------------------------------------------------------------- @@ -8909,7 +8887,7 @@ if (scalar keys %HTMLOutput) { &tab_end; } - print "\n \n\n"; + print "\n \n\n"; # BY COUNTRY/DOMAIN #--------------------------- @@ -9173,7 +9151,7 @@ if (scalar keys %HTMLOutput) { &tab_end("* $Message[158]"); } - print "\n \n\n"; + print "\n \n\n"; # BY SESSION #---------------------------- @@ -9289,7 +9267,7 @@ if (scalar keys %HTMLOutput) { #------------------------- if ($ShowPagesStats) { if ($Debug) { debug("ShowPagesStats (MaxNbOf{'PageShown'}=$MaxNbOf{'PageShown'} TotalDifferentPages=$TotalDifferentPages)",2); } - print "$Center   
\n"; + print "$Center   
\n"; my $title="$Message[19] ($Message[77] $MaxNbOf{'PageShown'})   -   $Message[80]"; if ($ShowPagesStats =~ /E/i) { $title.="   -   $Message[104]"; } if ($ShowPagesStats =~ /X/i) { $title.="   -   $Message[116]"; } @@ -9501,7 +9479,7 @@ if (scalar keys %HTMLOutput) { &tab_end; } - print "\n \n\n"; + print "\n \n\n"; # BY REFERENCE #--------------------------- @@ -9621,7 +9599,7 @@ if (scalar keys %HTMLOutput) { &tab_end; } - print "\n \n\n"; + print "\n \n\n"; # BY SEARCH KEYWORDS AND/OR KEYPHRASES #------------------------------------- @@ -9639,10 +9617,13 @@ if (scalar keys %HTMLOutput) { my $count=0; &BuildKeyList($MaxNbOf{'KeyphrasesShown'},$MinHit{'Keyphrase'},\%_keyphrases,\%_keyphrases); foreach my $key (@keylist) { - my $mot = CleanFromCSSA(DecodeEncodedString($key)); + my $mot; + # Convert coded keywords (utf8,...) to be correctly reported in HTML page. + if ($PluginsLoaded{'DecodeKey'}{'decodeutfkeys'}) { $mot=CleanFromCSSA(DecodeKey_decodeutfkeys($key,$PageCode||'iso-8859-1')); } + else { $mot = CleanFromCSSA(DecodeEncodedString($key)); } my $p; if ($TotalKeyphrases) { $p=int($_keyphrases{$key}/$TotalKeyphrases*1000)/10; } - print "$mot$_keyphrases{$key}$p %\n"; + print "".XMLEncode($mot)."$_keyphrases{$key}$p %\n"; $total_s += $_keyphrases{$key}; $count++; } @@ -9668,10 +9649,13 @@ if (scalar keys %HTMLOutput) { my $count=0; &BuildKeyList($MaxNbOf{'KeywordsShown'},$MinHit{'Keyword'},\%_keywords,\%_keywords); foreach my $key (@keylist) { - my $mot = CleanFromCSSA(DecodeEncodedString($key)); + my $mot; + # Convert coded keywords (utf8,...) to be correctly reported in HTML page. + if ($PluginsLoaded{'DecodeKey'}{'decodeutfkeys'}) { $mot=CleanFromCSSA(DecodeKey_decodeutfkeys($key,$PageCode||'iso-8859-1')); } + else { $mot = CleanFromCSSA(DecodeEncodedString($key)); } my $p; if ($TotalKeywords) { $p=int($_keywords{$key}/$TotalKeywords*1000)/10; } - print "$mot$_keywords{$key}$p %\n"; + print "".XMLEncode($mot)."$_keywords{$key}$p %\n"; $total_s += $_keywords{$key}; $count++; } @@ -9688,7 +9672,7 @@ if (scalar keys %HTMLOutput) { } if ($ShowKeyphrasesStats && $ShowKeywordsStats) { print "\n"; } - print "\n \n\n"; + print "\n \n\n"; # BY MISC #---------------------------- diff --git a/wwwroot/cgi-bin/plugins/decodeutfkeys.pm b/wwwroot/cgi-bin/plugins/decodeutfkeys.pm new file mode 100644 index 00000000..b0b754e9 --- /dev/null +++ b/wwwroot/cgi-bin/plugins/decodeutfkeys.pm @@ -0,0 +1,75 @@ +#!/usr/bin/perl +#----------------------------------------------------------------------------- +# decodeUTFKeys AWStats plugin +# Allow AWStats to convert keywords strings coded by some search engines in +# UTF8 coding to a common string in a local charset. +#----------------------------------------------------------------------------- +# Perl Required Modules: Encode and URI::Escape +#----------------------------------------------------------------------------- +# $Revision$ - $Author$ - $Date$ + + +# <----- +# ENTER HERE THE USE COMMAND FOR ALL REQUIRED PERL MODULES +if (!eval ('require "Encode.pm"')) { return $@?"Error: $@":"Error: Need Perl module Encode"; } +if (!eval ('require "URI/Escape.pm"')) { return $@?"Error: $@":"Error: Need Perl module URI::Escape"; } +#if (!eval ('require "HTML/Entities.pm"')) { return $@?"Error: $@":"Error: Need Perl module HTML::Entities"; } +# -----> +use strict;no strict "refs"; + + + +#----------------------------------------------------------------------------- +# PLUGIN VARIABLES +#----------------------------------------------------------------------------- +# <----- +# ENTER HERE THE MINIMUM AWSTATS VERSION REQUIRED BY YOUR PLUGIN +# AND THE NAME OF ALL FUNCTIONS THE PLUGIN MANAGE. +my $PluginNeedAWStatsVersion="6.0"; +my $PluginHooksFunctions="DecodeKey"; +# -----> + +# <----- +# IF YOUR PLUGIN NEED GLOBAL VARIABLES, THEY MUST BE DECLARED HERE. +use vars qw/ +/; +# -----> + + + +#----------------------------------------------------------------------------- +# PLUGIN FUNCTION: Init_pluginname +#----------------------------------------------------------------------------- +sub Init_decodeutfkeys { + my $InitParams=shift; + + # <----- + # ENTER HERE CODE TO DO INIT PLUGIN ACTIONS + # -----> + + my $checkversion=&Check_Plugin_Version($PluginNeedAWStatsVersion); + return ($checkversion?$checkversion:"$PluginHooksFunctions"); +} + + +#------------------------------------------------------------------------------ +# Function: Converts an UTF8 string to specified Charset +# Parameters: utfstringtodecode charsettoencode +# Return: newencodedstring +#------------------------------------------------------------------------------ +sub DecodeKey_decodeutfkeys { + my $string = shift; + my $encoding = shift; + if (! $encoding) { error("Function DecodeKey from plugin decodeutfkeys was called but AWStats don't know language code required to output new value."); } + $string=URI::Escape::uri_unescape($string); + if ( $string =~ m/^([\x00-\x7f]|[\xc2-\xdf][\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf])*$/ ) + { + $string=Encode::encode($encoding, Encode::decode("utf-8", $string)); + } + #$string=HTML::Entities::encode_entities($string); + $string =~ s/;+/ /g; + return $string; +} + + +1; # Do not remove this line diff --git a/wwwroot/cgi-bin/plugins/timezone.pm b/wwwroot/cgi-bin/plugins/timezone.pm index 61b9e6a4..3d488000 100644 --- a/wwwroot/cgi-bin/plugins/timezone.pm +++ b/wwwroot/cgi-bin/plugins/timezone.pm @@ -10,7 +10,7 @@ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -# !!!!! This plugin reduces AWStats speed by 30% !!!!! +# !!!!! This plugin reduces AWStats speed by 40% !!!!! # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # <----- # ENTER HERE THE USE COMMAND FOR ALL REQUIRED PERL MODULES