From: eldy <> Date: Mon, 22 Dec 2003 19:52:29 +0000 (+0000) Subject: Added LevelForKeywordsDetection parameter. X-Git-Tag: AWSTATS_6_0_RELEASE~57 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3d24882eb535f1676f99f69ec6aef4487fcb314c;p=thirdparty%2FAWStats.git Added LevelForKeywordsDetection parameter. --- diff --git a/docs/awstats_config.html b/docs/awstats_config.html index de67906d..ea3720db 100644 --- a/docs/awstats_config.html +++ b/docs/awstats_config.html @@ -112,6 +112,7 @@ when reading it), follow the example:
  • LevelForRefererAnalyze
  • LevelForRobotsDetection
  • LevelForSearchEnginesDetection +
  • LevelForKeywordsDetection
  • LevelForFileTypesDetection
  • LevelForWormsDetection @@ -1177,7 +1178,7 @@ AWStats 4.1+ supports both keywords AND keyphrases by default with no need of an LevelFor
    Version : 4.0+
    -6.0+ for LevelForFileTypesDetection, LevelForSearchEnginesDetection
    +6.0+ for LevelForFileTypesDetection, LevelForSearchEnginesDetection, LevelForKeywordsDetection, LevelForWormsDetection

    # Following values allows you to define accuracy of AWStats entities (robots,
    # browsers, os, referers, file types) detection.
    # It is recommanded that very important web sites or ISP that provides AWStats @@ -1191,9 +1192,10 @@ AWStats 4.1+ supports both keywords AND keyphrases by default with no need of an
    #
    LevelForBrowsersDetection=2 # 0 disables Browsers detection.
    LevelForOSDetection=2 # 0 disables OS detection. -
    LevelForRefererAnalyze=2 # 0 disables origin detection. +
    LevelForRefererAnalyze=2 # 0 disables Origin detection.
    LevelForRobotsDetection=2 # 0 disables Robots detection.
    LevelForSearchEnginesDetection=2 # 0 disables Search engines detection. +
    LevelForKeywordsDetection=2 # 0 disables Keyphrases/Keywords detection.
    LevelForFileTypesDetection=2 # 0 disables file types detection.
    LevelForWormsDetection=0 # 0 disables Worms detection. diff --git a/tools/webmin/awstats/edit_config.cgi b/tools/webmin/awstats/edit_config.cgi index bfe25cb6..8a2d4d98 100644 --- a/tools/webmin/awstats/edit_config.cgi +++ b/tools/webmin/awstats/edit_config.cgi @@ -226,14 +226,18 @@ print "
    \n"; print "
    OPTIONAL ACCURACY SETUP SECTION (Not required but increase AWStats features)

    \n"; if ($in{'advanced'} == 2) { - print " LevelForRobotsDetection "; - print &hblink($text{'help_help'}, "help.cgi?param=LevelForRobotsDetection")." \n"; print " LevelForBrowsersDetection "; print &hblink($text{'help_help'}, "help.cgi?param=LevelForBrowsersDetection")." \n"; print " LevelForOSDetection "; print &hblink($text{'help_help'}, "help.cgi?param=LevelForOSDetection")." \n"; print " LevelForRefererAnalyze "; print &hblink($text{'help_help'}, "help.cgi?param=LevelForRefererAnalyze")." \n"; + print " LevelForRobotsDetection "; + print &hblink($text{'help_help'}, "help.cgi?param=LevelForRobotsDetection")." \n"; + print " LevelForSearchEnginesDetection "; + print &hblink($text{'help_help'}, "help.cgi?param=LevelForSearchEnginesDetection")." \n"; + print " LevelForKeywordsDetection "; + print &hblink($text{'help_help'}, "help.cgi?param=LevelForKeywordsDetection")." \n"; print " LevelForFileTypesDetection "; print &hblink($text{'help_help'}, "help.cgi?param=LevelForFileTypesDetection")." \n"; print " LevelForWormsDetection "; diff --git a/wwwroot/cgi-bin/awstats.model.conf b/wwwroot/cgi-bin/awstats.model.conf index dd4dad8d..6336d554 100644 --- a/wwwroot/cgi-bin/awstats.model.conf +++ b/wwwroot/cgi-bin/awstats.model.conf @@ -780,6 +780,7 @@ LevelForOSDetection=2 # 0 disables OS detection. LevelForRefererAnalyze=2 # 0 disables Origin detection. LevelForRobotsDetection=2 # 0 disables Robots detection. LevelForSearchEnginesDetection=2 # 0 disables Search engines detection. +LevelForKeywordsDetection=2 # 0 disables Keyphrases/Keywords detection. LevelForFileTypesDetection=2 # 0 disables File types detection. LevelForWormsDetection=0 # 0 disables Worms detection. diff --git a/wwwroot/cgi-bin/awstats.pl b/wwwroot/cgi-bin/awstats.pl index f83cc13d..d83bb8a5 100644 --- a/wwwroot/cgi-bin/awstats.pl +++ b/wwwroot/cgi-bin/awstats.pl @@ -6504,40 +6504,43 @@ if ($UpdateStats && $FrameName ne 'index' && $FrameName ne 'mainleft') { # Updat $_from_h[2]++; $_se_referrals_h{$TmpRefererServer{$refererserver}}++; $found=1; - my @refurl=split(/\?/,$field[$pos_referer],2); # TODO Use \? or [$URLQuerySeparators] ? - if ($refurl[1]) { - # Extract params of referer query string (q=cache:mmm:www/zzz+aaa+bbb q=aaa+bbb/ccc key=ddd%20eee lang_en ie=UTF-8 ...) - my @paramlist=split(/&/,$KeyWordsNotSensitive?lc($refurl[1]):$refurl[1]); - if ($SearchEnginesKnownUrl{$TmpRefererServer{$refererserver}}) { # Search engine with known URL syntax - foreach my $param (@paramlist) { - if ($param =~ s/^$SearchEnginesKnownUrl{$TmpRefererServer{$refererserver}}//) { + if ($LevelForKeywordsDetection) { + my @refurl=split(/\?/,$field[$pos_referer],2); # TODO Use \? or [$URLQuerySeparators] ? + if ($refurl[1]) { + # Extract params of referer query string (q=cache:mmm:www/zzz+aaa+bbb q=aaa+bbb/ccc key=ddd%20eee lang_en ie=UTF-8 ...) + if ($SearchEnginesKnownUrl{$TmpRefererServer{$refererserver}}) { # Search engine with known URL syntax + my @paramlist=split(/&/,$KeyWordsNotSensitive?lc($refurl[1]):$refurl[1]); + foreach my $param (@paramlist) { + if ($param =~ s/^$SearchEnginesKnownUrl{$TmpRefererServer{$refererserver}}//) { + # We found good parameter + # Now param is keyphrase: "cache:mmm:www/zzz+aaa+bbb/ccc+ddd%20eee'fff,ggg" + $param =~ s/^(cache|related):[^\+]+//; + &ChangeWordSeparatorsIntoSpace($param); # Change [ aaa+bbb/ccc+ddd%20eee'fff,ggg ] into [ aaa bbb/ccc ddd eee fff ggg] + $param =~ s/^ +//; $param =~ s/ +$//; $param =~ tr/ /\+/s; + if ((length $param) > 0) { $_keyphrases{$param}++; } + last; + } + } + } + elsif ($LevelForKeywordsDetection >= 2) { # Search engine with unknown URL syntax + my @paramlist=split(/&/,$KeyWordsNotSensitive?lc($refurl[1]):$refurl[1]); + foreach my $param (@paramlist) { + my $foundexcludeparam=0; + foreach my $paramtoexclude (@WordsToCleanSearchUrl) { + if ($param =~ /$paramtoexclude/i) { $foundexcludeparam=1; last; } # Not the param with search criteria + } + if ($foundexcludeparam) { next; } # We found good parameter - # Now param is keyphrase: "cache:mmm:www/zzz+aaa+bbb/ccc+ddd%20eee'fff,ggg" + $param =~ s/.*=//; + # Now param is keyphrase: "aaa+bbb/ccc+ddd%20eee'fff,ggg" $param =~ s/^(cache|related):[^\+]+//; - &ChangeWordSeparatorsIntoSpace($param); # Change [ aaa+bbb/ccc+ddd%20eee'fff,ggg ] into [ aaa bbb/ccc ddd eee fff ggg] + &ChangeWordSeparatorsIntoSpace($param); # Change [ aaa+bbb/ccc+ddd%20eee'fff,ggg ] into [ aaa bbb/ccc ddd eee fff ggg ] $param =~ s/^ +//; $param =~ s/ +$//; $param =~ tr/ /\+/s; - if ((length $param) > 0) { $_keyphrases{$param}++; } - last; + if ((length $param) > 2) { $_keyphrases{$param}++; last; } } } - } - else { # Search engine with unknown URL syntax - foreach my $param (@paramlist) { - my $foundexcludeparam=0; - foreach my $paramtoexclude (@WordsToCleanSearchUrl) { - if ($param =~ /$paramtoexclude/i) { $foundexcludeparam=1; last; } # Not the param with search criteria - } - if ($foundexcludeparam) { next; } - # We found good parameter - $param =~ s/.*=//; - # Now param is keyphrase: "aaa+bbb/ccc+ddd%20eee'fff,ggg" - $param =~ s/^(cache|related):[^\+]+//; - &ChangeWordSeparatorsIntoSpace($param); # Change [ aaa+bbb/ccc+ddd%20eee'fff,ggg ] into [ aaa bbb/ccc ddd eee fff ggg ] - $param =~ s/^ +//; $param =~ s/ +$//; $param =~ tr/ /\+/s; - if ((length $param) > 2) { $_keyphrases{$param}++; last; } - } - } - } # End of if refurl[1] + } # End of if refurl[1] + } } } # End of if ($TmpRefererServer) else {