From 0b29f585224f4a98b1e864db28e51ed5af5ca6d3 Mon Sep 17 00:00:00 2001
From: eldy <>
Date: Fri, 28 Jun 2002 13:55:43 +0000
Subject: [PATCH] Better quality of code
---
wwwroot/cgi-bin/awstats.pl | 346 ++++++++++++++++++++-----------------
1 file changed, 192 insertions(+), 154 deletions(-)
diff --git a/wwwroot/cgi-bin/awstats.pl b/wwwroot/cgi-bin/awstats.pl
index 11288fc0..0a49b2f7 100644
--- a/wwwroot/cgi-bin/awstats.pl
+++ b/wwwroot/cgi-bin/awstats.pl
@@ -11,15 +11,15 @@
#-------------------------------------------------------
# $Revision$ - $Author$ - $Date$
-use strict;no strict "refs";
#use warnings; # Must be used in test mode only. This reduce a little process speed
#use diagnostics; # Must be used in test mode only. This reduce a lot of process speed
+use strict;no strict "refs";
use Socket;
use Time::Local; # use Time::Local 'timelocal_nocheck' is faster but not supported by all Time::Local modules
use vars qw/ $UseHiRes $UseCompress /;
# Next 'use' can be uncommented to get miliseconds time in showsteps option
-#use Time::HiRes qw( gettimeofday ); $UseHiRes=1;
+use Time::HiRes qw( gettimeofday ); $UseHiRes=1;
# Next 'use' can be uncommented to allow read/write of gz compressed log or history files (not working yet)
#use Compress::Zlib; $UseCompress=1;
@@ -35,45 +35,41 @@ my $VERSION="4.1 (build $REVISION)";
# ---------- Init variables -------
use vars qw/
+$DIR $PROG $Extension
$Debug
$ShowSteps
+$Lang
$AWScript
-$DIR
-$PROG
-$Extension
-$DNSLookup
$DirCgi
$DirData
$DirIcons
$DirLang
+$LogSeparator
+$KeyWordsNotSensitive
+$DNSLookup
$DNSLookupAlreadyDone
-$Lang
$DEBUGFORCED
-$KeyWordsNotSensitive
$MaxRowsInHTMLOutput
$VisitTimeOut
$VisitTolerance
$NbOfLinesForBenchmark
$WIDTH
$CENTER
-$PreviousHost
/;
-# TODO $PreviousHost Check if this enhance speed
+$DIR=$PROG=$Extension="";
$Debug=0;
$ShowSteps=0;
+$Lang="en";
$AWScript="";
-$DIR="";
-$PROG="";
-$Extension="";
-$DNSLookup=0;
$DirCgi="";
$DirData="";
$DirIcons="";
$DirLang="";
+$LogSeparator="\\s";
+$KeyWordsNotSensitive = 1; # Keywords are not case sensitive
+$DNSLookup=0;
$DNSLookupAlreadyDone=0;
-$Lang="en";
$DEBUGFORCED = 0; # Force debug level to log lesser level into debug.log file (Keep this value to 0)
-$KeyWordsNotSensitive = 1; # Keywords are not case sensitive
$MaxRowsInHTMLOutput = 1000; # Max number of rows for not limited HTML arrays
$VisitTimeOut = 10000; # Laps of time to consider a page load as a new visit. 10000 = 1 hour (Default = 10000)
$VisitTolerance= 10000; # Laps of time to accept a record if not in correct order. 10000 = 1 hour (Default = 10000)
@@ -112,9 +108,6 @@ $nowtime $tomorrowtime
$nowweekofmonth $nowdaymod $nowsmallyear
$nowsec $nowmin $nowhour $nowday $nowmonth $nowyear $nowwday $nowns
/;
-$nowtime = $tomorrowtime = 0;
-$nowweekofmonth = $nowdaymod = $nowsmallyear = 0;
-$nowsec = $nowmin = $nowhour = $nowday = $nowmonth = $nowyear = $nowwday = $nowns = 0;
use vars qw/
$AllowAccessFromWebToAuthenticatedUsersOnly $BarHeight $BarWidth $DebugResetDone
$Expires $CreateDirDataIfNotExists $KeepBackupOfHistoricFiles $MaxLengthOfURL
@@ -191,13 +184,13 @@ $color_h, $color_k, $color_p, $color_e, $color_x, $color_s, $color_u, $color_v)=
("","","","","","","","","","","","","","","","","","","","","");
use vars qw/
$HTMLOutput $FileConfig $FileSuffix $Host $DayRequired $MonthRequired $YearRequired
-$QueryString $SiteConfig $StaticLinks $URLFilter $PageCode $LogFormatString $PerlParsingFormat
+$QueryString $SiteConfig $StaticLinks $URLFilter $PageCode $PerlParsingFormat
$SiteToAnalyze $SiteToAnalyzeWithoutwww $UserAgent
/;
($HTMLOutput, $FileConfig, $FileSuffix, $Host, $DayRequired, $MonthRequired, $YearRequired,
-$QueryString, $SiteConfig, $StaticLinks, $URLFilter, $PageCode, $LogFormatString, $PerlParsingFormat,
-$SiteToAnalyze, $SiteToAnalyzeWithoutwww, $UserAgent, $PreviousHost)=
-("","","","","","","","","","","","","","","","","","");
+$QueryString, $SiteConfig, $StaticLinks, $URLFilter, $PageCode, $PerlParsingFormat,
+$SiteToAnalyze, $SiteToAnalyzeWithoutwww, $UserAgent)=
+("","","","","","","","","","","","","","","","");
use vars qw/
$pos_vh $pos_rc $pos_logname $pos_date $pos_method $pos_url $pos_code $pos_size
$pos_referer $pos_agent $pos_query $pos_gzipin $pos_gzipout $pos_gzipratio
@@ -416,6 +409,11 @@ use vars qw/ %httpcodewithtooltips /;
# Functions
#-------------------------------------------------------
+#------------------------------------------------------------------------------
+# Function: Write on ouput header of HTML page
+# Input: $HTMLOutput $PageCode $Expires
+# Output: -
+#------------------------------------------------------------------------------
sub html_head {
if ($HTMLOutput) {
# Write head section
@@ -454,7 +452,6 @@ DIV { font: 12px arial,verdana,helvetica; text-align:justify; }
\@media projection {
.tablecontainer { page-break-before: always; }
}
-
//-->
EOF
@@ -486,6 +483,11 @@ EOF
}
+#------------------------------------------------------------------------------
+# Function: Write on ouput end of HTML page
+# Input: -
+# Output: -
+#------------------------------------------------------------------------------
sub html_end {
if ($HTMLOutput) {
print "$CENTER
\n";
@@ -528,6 +530,11 @@ sub tab_end {
print "\n\n";
}
+#------------------------------------------------------------------------------
+# Function: Write error message and exit
+# Input: -
+# Output: -
+#------------------------------------------------------------------------------
sub error {
my $message=shift||"";
my $secondmessage=shift||"";
@@ -594,6 +601,11 @@ sub error {
exit 1;
}
+#------------------------------------------------------------------------------
+# Function: Write a warning message
+# Input: -
+# Output: -
+#------------------------------------------------------------------------------
sub warning {
my $messagestring=shift;
if ($Debug) { debug("$messagestring",1); }
@@ -608,8 +620,12 @@ sub warning {
}
}
-# Parameters : $string $level
-# Input : $Debug = required level $DEBUGFORCED = required level forced
+#------------------------------------------------------------------------------
+# Function: Write error message and exit
+# Parameters: $string $level
+# Input: $Debug = required level $DEBUGFORCED = required level forced
+# Output: -
+#------------------------------------------------------------------------------
sub debug {
my $level = $_[1] || 1;
if ($level <= $DEBUGFORCED) {
@@ -626,26 +642,51 @@ sub debug {
}
}
+#------------------------------------------------------------------------------
+# Function: Check if parameter is in SkiHosts array
+# Input: host @SkipHosts
+# Output: 0 Not found, 1 Found
+#------------------------------------------------------------------------------
sub SkipHost {
foreach my $match (@SkipHosts) { if ($_[0] =~ /$match/i) { return 1; } }
0; # Not in @SkipHosts
}
+#------------------------------------------------------------------------------
+# Function: Check if parameter is in SkiFiles array
+# Input: url @SkipFiles
+# Output: 0 Not found, 1 Found
+#------------------------------------------------------------------------------
sub SkipFile {
foreach my $match (@SkipFiles) { if ($_[0] =~ /$match/i) { return 1; } }
0; # Not in @SkipFiles
}
+#------------------------------------------------------------------------------
+# Function: Check if parameter is in OnlyFiles array
+# Input: host @SkipHosts
+# Output: 0 Not found, 1 Found
+#------------------------------------------------------------------------------
sub OnlyFile {
foreach my $match (@OnlyFiles) { if ($_[0] =~ /$match/i) { return 1; } }
0; # Not in @OnlyFiles
}
+#------------------------------------------------------------------------------
+# Function: Check if parameter is in SkiHosts array
+# Input: host @SkipHosts
+# Output: 0 Not found, 1 Found
+#------------------------------------------------------------------------------
sub SkipDNSLookup {
foreach my $match (@SkipDNSLookupFor) { if ($_[0] =~ /$match/i) { return 1; } }
0; # Not in @SkipDNSLookupFor
}
+#------------------------------------------------------------------------------
+# Function: Return day of week of a day
+# Input: $day $month $year
+# Output: 0-6
+#------------------------------------------------------------------------------
sub DayOfWeek {
my ($day, $month, $year) = @_;
if ($Debug) { debug("DayOfWeek for $day $month $year",4); }
@@ -756,8 +797,7 @@ sub Read_Config_File {
# Read optional setup section
if ($param =~ /^AllowAccessFromWebToAuthenticatedUsersOnly/) { $AllowAccessFromWebToAuthenticatedUsersOnly=$value; next; }
if ($param =~ /^AllowAccessFromWebToFollowingAuthenticatedUsers/) {
- my @felter=split(/\s+/,$value);
- foreach my $elem (@felter) { push @AllowAccessFromWebToFollowingAuthenticatedUsers,$elem; }
+ foreach my $elem (split(/\s+/,$value)) { push @AllowAccessFromWebToFollowingAuthenticatedUsers,$elem; }
next;
}
if ($param =~ /^CreateDirDataIfNotExists/) { $CreateDirDataIfNotExists=$value; next; }
@@ -768,37 +808,31 @@ sub Read_Config_File {
if ($param =~ /^DefaultFile/) { $DefaultFile=$value; next; }
if ($param =~ /^SkipHosts/) {
$value =~ s/\\\./\./g; $value =~ s/([^\\])\./$1\\\./g; $value =~ s/^\./\\\./; # Replace . into \.
- my @felter=split(/\s+/,$value);
- foreach my $elem (@felter) { push @SkipHosts,$elem; }
+ foreach my $elem (split(/\s+/,$value)) { push @SkipHosts,$elem; }
next;
}
if ($param =~ /^SkipDNSLookupFor/) {
$value =~ s/\\\./\./g; $value =~ s/([^\\])\./$1\\\./g; $value =~ s/^\./\\\./; # Replace . into \.
- my @felter=split(/\s+/,$value);
- foreach my $elem (@felter) { push @SkipDNSLookupFor,$elem; }
+ foreach my $elem (split(/\s+/,$value)) { push @SkipDNSLookupFor,$elem; }
next;
}
if ($param =~ /^SkipFiles/) {
$value =~ s/\\\./\./g; $value =~ s/([^\\])\./$1\\\./g; $value =~ s/^\./\\\./; # Replace . into \.
- my @felter=split(/\s+/,$value);
- foreach my $elem (@felter) { push @SkipFiles,$elem; }
+ foreach my $elem (split(/\s+/,$value)) { push @SkipFiles,$elem; }
next;
}
if ($param =~ /^OnlyFiles/) {
$value =~ s/\\\./\./g; $value =~ s/([^\\])\./$1\\\./g; $value =~ s/^\./\\\./; # Replace . into \.
- my @felter=split(/\s+/,$value);
- foreach my $elem (@felter) { push @OnlyFiles,$elem; }
+ foreach my $elem (split(/\s+/,$value)) { push @OnlyFiles,$elem; }
next;
}
if ($param =~ /^NotPageList/) {
- my @felter=split(/\s+/,$value);
- foreach my $elem (@felter) { $NotPageList{$elem}=1; }
+ foreach my $elem (split(/\s+/,$value)) { $NotPageList{$elem}=1; }
$foundNotPageList=1;
next;
}
if ($param =~ /^ValidHTTPCodes/) {
- my @felter=split(/\s+/,$value);
- foreach my $elem (@felter) { $ValidHTTPCodes{$elem}=1; }
+ foreach my $elem (split(/\s+/,$value)) { $ValidHTTPCodes{$elem}=1; }
$foundValidHTTPCodes=1;
next;
}
@@ -1335,7 +1369,7 @@ sub Check_Config {
sub Read_History_File {
my $year=sprintf("%04i",shift);
my $month=sprintf("%02i",shift);
- my $part=shift; # If part=0 wee need only TotalVisits, LastUpdate, TIME section and VISITOR section
+ my $part=shift; # If part=0 wee need only LastUpdate, TotalVisits, TIME section and VISITOR section
# In standard use of AWStats, the DayRequired variable is always empty
if ($DayRequired) { if ($Debug) { debug("Call to Read_History_File [$year,$month,$part] ($DayRequired)"); } }
@@ -1358,7 +1392,6 @@ sub Read_History_File {
if ($UseCompress) { $historyfilename="gzip -d <\"$historyfilename\" |"; }
if ($Debug) { debug(" History file is '$historyfilename'",2); }
- # TODO If session for read (no update), file can be open with share. So POSSIBLE CHANGE HERE
# TODO Whith particular option file reading can be stopped if section all read
open(HISTORY,$historyfilename) || error("Error: Couldn't open file \"$historyfilename\" for read: $!"); # Month before Year kept for backward compatibility
$MonthUnique{$year.$month}=0; $MonthPages{$year.$month}=0; $MonthHits{$year.$month}=0; $MonthBytes{$year.$month}=0; $MonthHostsKnown{$year.$month}=0; $MonthHostsUnknown{$year.$month}=0;
@@ -1939,7 +1972,7 @@ sub Read_History_File {
if ($loadrecord) {
if ($field[1]) {
if ($loadrecord==2) {
- my @wordarray=split(/\+/,$field[0]); foreach my $word (@wordarray) {
+ foreach my $word (split(/\+/,$field[0])) {
$_keywords{$word}+=$field[1];
}
}
@@ -2286,13 +2319,13 @@ sub Save_History_File {
$keysinkeylist{$key}=1;
my $keyphrase=$key;
print HISTORYTMP "$keyphrase $_keyphrases{$key}\n";
- my @wordarray=split(/\+/,$key); foreach my $word (@wordarray) { $_keywords{$word}+=$_keyphrases{$key}; } # To init %_keywords
+ foreach my $word (split(/\+/,$key)) { $_keywords{$word}+=$_keyphrases{$key}; } # To init %_keywords
}
foreach my $key (keys %_keyphrases) {
if ($keysinkeylist{$key}) { next; }
my $keyphrase=$key;
print HISTORYTMP "$keyphrase $_keyphrases{$key}\n";
- my @wordarray=split(/\+/,$key); foreach my $word (@wordarray) { $_keywords{$word}+=$_keyphrases{$key}; } # To init %_keywords
+ foreach my $word (split(/\+/,$key)) { $_keywords{$word}+=$_keyphrases{$key}; } # To init %_keywords
}
print HISTORYTMP "END_SEARCHWORDS\n";
print HISTORYTMP "\n";
@@ -2336,18 +2369,16 @@ sub Save_History_File {
#--------------------------------------------------------------------
# Function: Return time elapsed since last call in miliseconds
-# Input: None
+# Input: 0|1 (0 reset counter, 1 no reset)
# Return: Number of miliseconds elapsed since last call
#--------------------------------------------------------------------
sub GetDelaySinceStart {
- my $option=shift;
- if ($option) { $StartSeconds=0; } # Reset counter
+ if (shift) { $StartSeconds=0; } # Reset counter
my ($newseconds, $newmicroseconds)=(0,0);
if ($UseHiRes) { ($newseconds, $newmicroseconds) = &gettimeofday; }
else { $newseconds=time(); }
if (! $StartSeconds) { $StartSeconds=$newseconds; $StartMicroseconds=$newmicroseconds; }
- my $nbms=$newseconds*1000+int($newmicroseconds/1000)-$StartSeconds*1000-int($StartMicroseconds/1000);
- return ($nbms);
+ return ($newseconds*1000+int($newmicroseconds/1000)-$StartSeconds*1000-int($StartMicroseconds/1000));
}
#--------------------------------------------------------------------
@@ -2542,6 +2573,11 @@ sub IsAscii {
}
+#--------------------------------------------------------------------
+# Function: Add a val from sorting tree
+# Input:
+# Return:
+#--------------------------------------------------------------------
sub AddInTree {
my $keytoadd=shift;
my $keyval=shift;
@@ -2583,6 +2619,11 @@ sub AddInTree {
# if ($countaddintree % 100 == 0) { if ($Debug) { debug(" AddInTree End of 100",3); } }
}
+#--------------------------------------------------------------------
+# Function: Remove a val from sorting tree
+# Input:
+# Return:
+#--------------------------------------------------------------------
sub Removelowerval {
my $keytoremove=$val{$lowerval}; # This is lower key
if ($Debug) { debug(" remove for lowerval=$lowerval: key=$keytoremove",4); }
@@ -2950,8 +2991,6 @@ if ($UpdateStats) {
if ($Debug) { debug("HostAliases is now @HostAliases",1); }
if ($Debug) { debug("SkipFiles is now @SkipFiles",1); }
- if ($Debug) { debug("Start Update process"); }
-
# GENERATING PerlParsingFormat
#------------------------------------------
# Log example records
@@ -2965,105 +3004,103 @@ if ($UpdateStats) {
# LogFormat "%h %l %u %t \"%r\" %>s %b mod_gzip: %{mod_gzip_compression_ratio}npct." common_with_mod_gzip_info1
# LogFormat "%h %l %u %t \"%r\" %>s %b mod_gzip: %{mod_gzip_result}n In:%{mod_gzip_input_size}n Out:%{mod_gzip_output_size}n:%{mod_gzip_compression_ratio}npct." common_with_mod_gzip_info2
- $LogFormatString=$LogFormat;
- if ($LogFormat eq "1") { $LogFormatString="%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\""; }
- if ($LogFormat eq "2") { $LogFormatString="date time c-ip cs-username cs-method cs-uri-stem sc-status sc-bytes cs-version cs(User-Agent) cs(Referer)"; }
- if ($LogFormat eq "4") { $LogFormatString="%h %l %u %t \"%r\" %>s %b"; }
- if ($LogFormat eq "5") { $LogFormatString="c-ip cs-username c-agent sc-authenticated date time s-svcname s-computername cs-referred r-host r-ip r-port time-taken cs-bytes sc-bytes cs-protocol cs-transport s-operation cs-uri cs-mime-type s-object-source sc-status s-cache-info"; }
- # Replacement for Apache format string
- $LogFormatString =~ s/%v(\s)/%virtualname$1/g; $LogFormatString =~ s/%v$/%virtualname/g;
- $LogFormatString =~ s/%h(\s)/%host$1/g; $LogFormatString =~ s/%h$/%host/g;
- $LogFormatString =~ s/%l(\s)/%other$1/g; $LogFormatString =~ s/%l$/%other/g;
- $LogFormatString =~ s/%u(\s)/%logname$1/g; $LogFormatString =~ s/%u$/%logname/g;
- $LogFormatString =~ s/%t(\s)/%time1$1/g; $LogFormatString =~ s/%t$/%time1/g;
- $LogFormatString =~ s/\"%r\"/%methodurl/g;
- $LogFormatString =~ s/%>s/%code/g;
- $LogFormatString =~ s/%b(\s)/%bytesd$1/g; $LogFormatString =~ s/%b$/%bytesd/g;
- $LogFormatString =~ s/\"%{Referer}i\"/%refererquot/g;
- $LogFormatString =~ s/\"%{User-Agent}i\"/%uaquot/g;
- $LogFormatString =~ s/%{mod_gzip_input_size}n/%gzipin/g;
- $LogFormatString =~ s/%{mod_gzip_output_size}n/%gzipout/g;
- $LogFormatString =~ s/%{mod_gzip_compression_ratio}n/%gzipratio/g;
- # Replacement for a IIS and ISA format string
- $LogFormatString =~ s/date\stime/%time2/g;
- $LogFormatString =~ s/c-ip/%host/g;
- $LogFormatString =~ s/cs-username/%logname/g;
- $LogFormatString =~ s/cs-method/%method/g;
- $LogFormatString =~ s/cs-uri-stem/%url/g; $LogFormatString =~ s/cs-uri/%url/g;
- $LogFormatString =~ s/sc-status/%code/g;
- $LogFormatString =~ s/sc-bytes/%bytesd/g;
- $LogFormatString =~ s/cs-version/%other/g; # Protocol
- $LogFormatString =~ s/cs\(User-Agent\)/%ua/g; $LogFormatString =~ s/c-agent/%ua/g;
- $LogFormatString =~ s/cs\(Referer\)/%referer/g; $LogFormatString =~ s/cs-referred/%referer/g;
- $LogFormatString =~ s/cs-uri-query/%host/g;
- $LogFormatString =~ s/sc-authenticated/%other/g;
- $LogFormatString =~ s/s-svcname/%other/g;
- $LogFormatString =~ s/s-computername/%other/g;
- $LogFormatString =~ s/r-host/%other/g;
- $LogFormatString =~ s/r-ip/%other/g;
- $LogFormatString =~ s/r-port/%other/g;
- $LogFormatString =~ s/time-taken/%other/g;
- $LogFormatString =~ s/cs-bytes/%other/g;
- $LogFormatString =~ s/cs-protocol/%other/g;
- $LogFormatString =~ s/cs-transport/%other/g;
- $LogFormatString =~ s/s-operation/%other/g;
- $LogFormatString =~ s/cs-mime-type/%other/g;
- $LogFormatString =~ s/s-object-source/%other/g;
- $LogFormatString =~ s/s-cache-info/%other/g;
- # Generate PerlParsingFormat
- if ($Debug) { debug("Generate PerlParsingFormat from LogFormatString=$LogFormatString"); }
+ if ($Debug) { debug("Generate PerlParsingFormat from LogFormat=$LogFormat"); }
$PerlParsingFormat="";
- if ($LogFormat eq "1") {
- $PerlParsingFormat="([^\\s]+) [^\\s]+ ([^\\s]+) \\[([^\\s]+) [^\\s]+\\] \\\"([^\\s]+) ([^\\s]+) [^\\\"]+\\\" ([\\d|-]+) ([\\d|-]+) \\\"(.*)\\\" \\\"([^\\\"]*)\\\""; # referer and ua might be ""
- $pos_rc=1;$pos_logname=2;$pos_date=3;$pos_method=4;$pos_url=5;$pos_code=6;$pos_size=7;$pos_referer=8;$pos_agent=9;
- $lastrequiredfield=9;
- }
- if ($LogFormat eq "2") {
- $PerlParsingFormat="([^\\s]+ [^\\s]+) ([^\\s]+) ([^\\s]+) ([^\\s]+) ([^\\s]+) ([\\d|-]+) ([\\d|-]+) [^\\s]+ ([^\\s]+) ([^\\s]+)";
- $pos_date=1;$pos_rc=2;$pos_logname=3;$pos_method=4;$pos_url=5;$pos_code=6;$pos_size=7;$pos_agent=8;$pos_referer=9;
- $lastrequiredfield=9;
- }
- if ($LogFormat eq "3") {
- $PerlParsingFormat="([^\\t]*\\t[^\\t]*)\\t([^\\t]*)\\t([\\d]*)\\t([^\\t]*)\\t([^\\t]*)\\t([^\\t]*)\\t[^\\t]*\\t.*:([^\\t]*)\\t([\\d]*)";
- $pos_date=1;$pos_method=2;$pos_code=3;$pos_rc=4;$pos_agent=5;$pos_referer=6;$pos_url=7;$pos_size=8;
- $lastrequiredfield=8;
- }
- if ($LogFormat eq "4") {
- $PerlParsingFormat="([^\\s]*) [^\\s]* ([^\\s]*) \\[([^\\s]*) [^\\s]*\\] \\\"([^\\s]*) ([^\\s]*) [^\\\"]*\\\" ([\\d|-]*) ([\\d|-]*)";
- $pos_rc=1;$pos_logname=2;$pos_date=3;$pos_method=4;$pos_url=5;$pos_code=6;$pos_size=7;
- $lastrequiredfield=7;
- }
- if ($LogFormat eq "5") {
- $PerlParsingFormat="([^\\t]*)\\t([^\\t]*)\\t([^\\t]*)\\t[^\\t]*\\t([^\\t]*\\t[^\\t]*)\\t[^\\t]*\\t[^\\t]*\\t([^\\t]*)\\t[^\\t]*\\t[^\\t]*\\t[^\\t]*\\t[^\\t]*\\t[^\\t]*\\t([^\\t]*)\\t[^\\t]*\\t[^\\t]*\\t([^\\t]*)\\t([^\\t]*)\\t[^\\t]*\\t[^\\t]*\\t([^\\t]*)\\t[^\\t]*";
- $pos_rc=1;$pos_logname=2;$pos_agent=3;$pos_date=4;$pos_referer=5;$pos_size=6;$pos_method=7;$pos_url=8;$pos_code=9;
- $lastrequiredfield=9;
+ if ($LogFormat =~ /^[1-5]$/) { # Pre-defined log format
+ if ($LogFormat eq "1") { # Same than "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\""
+ $PerlParsingFormat="([^\\s]+) [^\\s]+ ([^\\s]+) \\[([^\\s]+) [^\\s]+\\] \\\"([^\\s]+) ([^\\s]+) [^\\\"]+\\\" ([\\d|-]+) ([\\d|-]+) \\\"(.*)\\\" \\\"([^\\\"]*)\\\""; # referer and ua might be ""
+ $pos_rc=1;$pos_logname=2;$pos_date=3;$pos_method=4;$pos_url=5;$pos_code=6;$pos_size=7;$pos_referer=8;$pos_agent=9;
+ $lastrequiredfield=9;
+ }
+ elsif ($LogFormat eq "2") { # Same than "date time c-ip cs-username cs-method cs-uri-stem sc-status sc-bytes cs-version cs(User-Agent) cs(Referer)"
+ $PerlParsingFormat="([^\\s]+ [^\\s]+) ([^\\s]+) ([^\\s]+) ([^\\s]+) ([^\\s]+) ([\\d|-]+) ([\\d|-]+) [^\\s]+ ([^\\s]+) ([^\\s]+)";
+ $pos_date=1;$pos_rc=2;$pos_logname=3;$pos_method=4;$pos_url=5;$pos_code=6;$pos_size=7;$pos_agent=8;$pos_referer=9;
+ $lastrequiredfield=9;
+ }
+ elsif ($LogFormat eq "3") { # Same than "%h %l %u %t \"%r\" %>s %b"
+ $PerlParsingFormat="([^\\t]*\\t[^\\t]*)\\t([^\\t]*)\\t([\\d]*)\\t([^\\t]*)\\t([^\\t]*)\\t([^\\t]*)\\t[^\\t]*\\t.*:([^\\t]*)\\t([\\d]*)";
+ $pos_date=1;$pos_method=2;$pos_code=3;$pos_rc=4;$pos_agent=5;$pos_referer=6;$pos_url=7;$pos_size=8;
+ $lastrequiredfield=8;
+ }
+ elsif ($LogFormat eq "4") {
+ $PerlParsingFormat="([^\\s]*) [^\\s]* ([^\\s]*) \\[([^\\s]*) [^\\s]*\\] \\\"([^\\s]*) ([^\\s]*) [^\\\"]*\\\" ([\\d|-]*) ([\\d|-]*)";
+ $pos_rc=1;$pos_logname=2;$pos_date=3;$pos_method=4;$pos_url=5;$pos_code=6;$pos_size=7;
+ $lastrequiredfield=7;
+ }
+ elsif ($LogFormat eq "5") { # Same than "c-ip cs-username c-agent sc-authenticated date time s-svcname s-computername cs-referred r-host r-ip r-port time-taken cs-bytes sc-bytes cs-protocol cs-transport s-operation cs-uri cs-mime-type s-object-source sc-status s-cache-info"
+ $PerlParsingFormat="([^\\t]*)\\t([^\\t]*)\\t([^\\t]*)\\t[^\\t]*\\t([^\\t]*\\t[^\\t]*)\\t[^\\t]*\\t[^\\t]*\\t([^\\t]*)\\t[^\\t]*\\t[^\\t]*\\t[^\\t]*\\t[^\\t]*\\t[^\\t]*\\t([^\\t]*)\\t[^\\t]*\\t[^\\t]*\\t([^\\t]*)\\t([^\\t]*)\\t[^\\t]*\\t[^\\t]*\\t([^\\t]*)\\t[^\\t]*";
+ $pos_rc=1;$pos_logname=2;$pos_agent=3;$pos_date=4;$pos_referer=5;$pos_size=6;$pos_method=7;$pos_url=8;$pos_code=9;
+ $lastrequiredfield=9;
+ }
}
- if ($LogFormat !~ /^[1-5]$/) {
- # Scan $LogFormat to found all required fields and generate PerlParsing
- my @fields = split(/\s+/, $LogFormatString); # make array of entries
+ else { # Personalized log format
+ my $LogFormatString=$LogFormat;
+ # Replacement for Apache format string
+ $LogFormatString =~ s/%v(\s)/%virtualname$1/g; $LogFormatString =~ s/%v$/%virtualname/g;
+ $LogFormatString =~ s/%h(\s)/%host$1/g; $LogFormatString =~ s/%h$/%host/g;
+ $LogFormatString =~ s/%l(\s)/%other$1/g; $LogFormatString =~ s/%l$/%other/g;
+ $LogFormatString =~ s/%u(\s)/%logname$1/g; $LogFormatString =~ s/%u$/%logname/g;
+ $LogFormatString =~ s/%t(\s)/%time1$1/g; $LogFormatString =~ s/%t$/%time1/g;
+ $LogFormatString =~ s/\"%r\"/%methodurl/g;
+ $LogFormatString =~ s/%>s/%code/g;
+ $LogFormatString =~ s/%b(\s)/%bytesd$1/g; $LogFormatString =~ s/%b$/%bytesd/g;
+ $LogFormatString =~ s/\"%{Referer}i\"/%refererquot/g;
+ $LogFormatString =~ s/\"%{User-Agent}i\"/%uaquot/g;
+ $LogFormatString =~ s/%{mod_gzip_input_size}n/%gzipin/g;
+ $LogFormatString =~ s/%{mod_gzip_output_size}n/%gzipout/g;
+ $LogFormatString =~ s/%{mod_gzip_compression_ratio}n/%gzipratio/g;
+ # Replacement for a IIS and ISA format string
+ $LogFormatString =~ s/date\stime/%time2/g;
+ $LogFormatString =~ s/c-ip/%host/g;
+ $LogFormatString =~ s/cs-username/%logname/g;
+ $LogFormatString =~ s/cs-method/%method/g;
+ $LogFormatString =~ s/cs-uri-stem/%url/g; $LogFormatString =~ s/cs-uri/%url/g;
+ $LogFormatString =~ s/sc-status/%code/g;
+ $LogFormatString =~ s/sc-bytes/%bytesd/g;
+ $LogFormatString =~ s/cs-version/%other/g; # Protocol
+ $LogFormatString =~ s/cs\(User-Agent\)/%ua/g; $LogFormatString =~ s/c-agent/%ua/g;
+ $LogFormatString =~ s/cs\(Referer\)/%referer/g; $LogFormatString =~ s/cs-referred/%referer/g;
+ $LogFormatString =~ s/cs-uri-query/%host/g;
+ $LogFormatString =~ s/sc-authenticated/%other/g;
+ $LogFormatString =~ s/s-svcname/%other/g;
+ $LogFormatString =~ s/s-computername/%other/g;
+ $LogFormatString =~ s/r-host/%other/g;
+ $LogFormatString =~ s/r-ip/%other/g;
+ $LogFormatString =~ s/r-port/%other/g;
+ $LogFormatString =~ s/time-taken/%other/g;
+ $LogFormatString =~ s/cs-bytes/%other/g;
+ $LogFormatString =~ s/cs-protocol/%other/g;
+ $LogFormatString =~ s/cs-transport/%other/g;
+ $LogFormatString =~ s/s-operation/%other/g;
+ $LogFormatString =~ s/cs-mime-type/%other/g;
+ $LogFormatString =~ s/s-object-source/%other/g;
+ $LogFormatString =~ s/s-cache-info/%other/g;
+ if ($Debug) { debug("LogFormatString=$LogFormatString"); }
+ # Scan $LogFormatString to found all required fields and generate PerlParsingFormat
+ my @fields = split(/\s+/,$LogFormatString); # make array of entries
my $i = 1;
foreach my $f (@fields) {
my $found=0;
if ($f =~ /%virtualname$/) {
$found=1;
$pos_vh = $i; $i++;
- $PerlParsingFormat .= "([^\\s]*)";
+ $PerlParsingFormat .= "([^$LogSeparator]*)";
}
elsif ($f =~ /%host$/) {
$found=1;
$pos_rc = $i; $i++;
- $PerlParsingFormat .= "([^\\s]*)";
+ $PerlParsingFormat .= "([^$LogSeparator]*)";
}
elsif ($f =~ /%logname$/) {
$found=1;
$pos_logname = $i; $i++;
- $PerlParsingFormat .= "([^\\s]*)";
+ $PerlParsingFormat .= "([^$LogSeparator]*)";
}
elsif ($f =~ /%time1b$/) {
$found=1;
$pos_date = $i;
$i++;
- $PerlParsingFormat .= "\\[([^\\s]*)\\]";
+ $PerlParsingFormat .= "\\[([^$LogSeparator]*)\\]";
}
elsif ($f =~ /%time1$/) {
$found=1;
@@ -3071,13 +3108,13 @@ if ($UpdateStats) {
$i++;
#$pos_zone = $i;
$i++;
- $PerlParsingFormat .= "\\[([^\\s]*) ([^\\s]*)\\]";
+ $PerlParsingFormat .= "\\[([^$LogSeparator]*) ([^$LogSeparator]*)\\]";
}
elsif ($f =~ /%time2$/) {
$found=1;
$pos_date = $i;
$i++;
- $PerlParsingFormat .= "([^\\s]* [^\\s]*)";
+ $PerlParsingFormat .= "([^$LogSeparator]* [^$LogSeparator]*)";
}
elsif ($f =~ /%methodurl$/) {
$found=1;
@@ -3085,7 +3122,7 @@ if ($UpdateStats) {
$i++;
$pos_url = $i;
$i++;
- $PerlParsingFormat .= "\\\"([^\\s]*) ([^\\s]*) [^\\\"]*\\\"";
+ $PerlParsingFormat .= "\\\"([^$LogSeparator]*) ([^$LogSeparator]*) [^\\\"]*\\\"";
}
elsif ($f =~ /%methodurlnoprot$/) {
$found=1;
@@ -3093,25 +3130,25 @@ if ($UpdateStats) {
$i++;
$pos_url = $i;
$i++;
- $PerlParsingFormat .= "\\\"([^\\s]*) ([^\\s]*)\\\"";
+ $PerlParsingFormat .= "\\\"([^$LogSeparator]*) ([^$LogSeparator]*)\\\"";
}
elsif ($f =~ /%method$/) {
$found=1;
$pos_method = $i;
$i++;
- $PerlParsingFormat .= "([^\\s]*)";
+ $PerlParsingFormat .= "([^$LogSeparator]*)";
}
elsif ($f =~ /%url$/) {
$found=1;
$pos_url = $i;
$i++;
- $PerlParsingFormat .= "([^\\s]*)";
+ $PerlParsingFormat .= "([^$LogSeparator]*)";
}
elsif ($f =~ /%query$/) {
$found=1;
$pos_query = $i;
$i++;
- $PerlParsingFormat .= "([^\\s]*)";
+ $PerlParsingFormat .= "([^$LogSeparator]*)";
}
elsif ($f =~ /%code$/) {
$found=1;
@@ -3132,7 +3169,7 @@ if ($UpdateStats) {
elsif ($f =~ /%referer$/) {
$found=1;
$pos_referer = $i; $i++;
- $PerlParsingFormat .= "([^\\s]*)";
+ $PerlParsingFormat .= "([^$LogSeparator]*)";
}
elsif ($f =~ /%uaquot$/) {
$found=1;
@@ -3142,31 +3179,31 @@ if ($UpdateStats) {
elsif ($f =~ /%ua$/) {
$found=1;
$pos_agent = $i; $i++;
- $PerlParsingFormat .= "([^\\s]*)";
+ $PerlParsingFormat .= "([^$LogSeparator]*)";
}
elsif ($f =~ /%gzipin$/ ) {
$found=1;
$pos_gzipin=$i;$i++;
- $PerlParsingFormat .= "([^\\s]*)";
+ $PerlParsingFormat .= "([^$LogSeparator]*)";
}
elsif ($f =~ /%gzipout/ ) { # Compare $f to /%gzipout/ and not to /%gzipout$/ like other fields
$found=1;
$pos_gzipout=$i;$i++;
- $PerlParsingFormat .= "([^\\s]*)";
+ $PerlParsingFormat .= "([^$LogSeparator]*)";
}
elsif ($f =~ /%gzipratio/ ) { # Compare $f to /%gzipratio/ and not to /%gzipratio$/ like other fields
$found=1;
$pos_gzipratio=$i;$i++;
- $PerlParsingFormat .= "([^\\s]*)";
+ $PerlParsingFormat .= "([^$LogSeparator]*)";
}
elsif ($f =~ /%syslog$/) { # Added for syslog time and host stamp, fields are skipped and not analyzed
$found=1;
$PerlParsingFormat .= "[A-Z][a-z][a-z] .[0-9] ..:..:.. [A-Za-z]+";
}
- if (! $found) { $found=1; $PerlParsingFormat .= "[^\\s]*"; }
- $PerlParsingFormat.="\\s";
+ if (! $found) { $found=1; $PerlParsingFormat .= "[^$LogSeparator]*"; }
+ $PerlParsingFormat.=$LogSeparator;
}
- if (! $PerlParsingFormat) { error("Error: No recognised format tag in personalized LogFormat string"); }
+ if (! $PerlParsingFormat) { error("Error: No recognized format tag in personalized LogFormat string"); }
chop($PerlParsingFormat); chop($PerlParsingFormat); # Remove last separator char "\s"
$lastrequiredfield=$i--;
}
@@ -3179,6 +3216,8 @@ if ($UpdateStats) {
if ($Debug) { debug("PerlParsingFormat is $PerlParsingFormat"); }
+ if ($Debug) { debug("Start Update process"); }
+
# READING THE LAST PROCESSED HISTORY FILE
#------------------------------------------
my $monthtoprocess=0; my $yeartoprocess=0; my $yearmonthtoprocess="";
@@ -3213,7 +3252,9 @@ if ($UpdateStats) {
if ($Debug) { debug("Open log file \"$LogFile\""); }
open(LOG,"$LogFile") || error("Error: Couldn't open server log file \"$LogFile\" : $!");
- my @field=(); my $counter=0;
+ my @field=();
+ my $counter=0;
+ my $PreviousHost="";
# Reset counter for benchmark (first call to GetDelaySinceStart)
GetDelaySinceStart(1);
if ($ShowSteps) { print "Phase 1 : First bypass old records\n"; }
@@ -3237,8 +3278,6 @@ if ($UpdateStats) {
}
foreach my $i (1..$lastrequiredfield) { $field[$i]=$$i; } # !!!!!
-# @field=Parse($_);
-
if ($Debug) { debug(" Correct format line $NbOfLinesRead : host=\"$field[$pos_rc]\", logname=\"$field[$pos_logname]\", date=\"$field[$pos_date]\", method=\"$field[$pos_method]\", url=\"$field[$pos_url]\", code=\"$field[$pos_code]\", size=\"$field[$pos_size]\", referer=\"$field[$pos_referer]\", agent=\"$field[$pos_agent]\"",3); }
#if ($Debug) { debug("$field[$pos_vh] - $field[$pos_gzipin] - $field[$pos_gzipout] - $field[$pos_gzipratio]\n"); }
@@ -3256,7 +3295,7 @@ if ($UpdateStats) {
if ($field[$pos_method] eq 'GET' || $field[$pos_method] eq 'POST' || $field[$pos_method] eq 'HEAD' || $field[$pos_method] =~ /OK/) {
# HTTP request. Keep only GET, POST, HEAD, *OK* with Webstar but not OPTIONS
$protocol=1;
- }
+ }
elsif ($field[$pos_method] =~ /sent/ || $field[$pos_method] =~ /get/) {
# FTP request.
$protocol=2;
@@ -3338,7 +3377,6 @@ if ($UpdateStats) {
#-------------------
$NbOfNewLines++;
-
# Is it in a new month section ?
#-------------------------------
if ((($monthrecord > $monthtoprocess) && ($yearrecord >= $yeartoprocess)) || ($yearrecord > $yeartoprocess)) {
@@ -3444,9 +3482,9 @@ if ($UpdateStats) {
$_filetypes_h{$extension}++;
$_filetypes_k{$extension}+=$field[$pos_size];
# Compression
- if ($pos_gzipin && $field[$pos_gzipin]) { # Si in et out present
- my ($notused,$in)=split(":",$field[$pos_gzipin]);
- my ($notused1,$out,$notused2)=split(":",$field[$pos_gzipout]);
+ if ($pos_gzipin && $field[$pos_gzipin]) { # If in and out in log
+ my ($notused,$in)=split(/:/,$field[$pos_gzipin]);
+ my ($notused1,$out,$notused2)=split(/:/,$field[$pos_gzipout]);
if ($out) {
$_filetypes_gz_in{$extension}+=$in;
$_filetypes_gz_out{$extension}+=$out;
@@ -3573,7 +3611,7 @@ if ($UpdateStats) {
# if (! $_hostmachine_h{$_}) { $MonthHostsUnknown{$yearmonthtoprocess}++; }
$_hostmachine_h{$_}++;
$_hostmachine_k{$_}+=$field[$pos_size];
- ${PreviousHost}=$_;
+ $PreviousHost=$_;
# Count top-level domain
if ($PageBool) { $_domener_p{$Domain}++; }
--
2.47.3