use Time::Local; # use Time::Local 'timelocal_nocheck' is not supported by all Time::Local modules
# Next 'use' can be uncommented (with its coupled line into GetDelaySinceStart function) to
# get miliseconds time in showsteps option
-use Time::HiRes qw( gettimeofday );
+#use Time::HiRes qw( gettimeofday );
+
#-------------------------------------------------------
$DEBUGFORCED
$MaxRowsInHTMLOutput
$VisitTimeOut
+$VisitTolerance
$NbOfLinesForBenchmark
$ShowBackLink
$WIDTH
$DEBUGFORCED = 0; # Force debug level to log lesser level into debug.log file (Keep this value to 0)
$MaxRowsInHTMLOutput = 1000; # Max number of rows for not limited HTML arrays
$VisitTimeOut = 10000; # Laps of time to consider a page load as a new visit. 10000 = one hour (Default = 10000)
+$VisitTolerance= 100; # Laps of time to accept a record if not in correct order. 100 = one minute (Default = 100)
$NbOfLinesForBenchmark=5000;
$ShowBackLink = 1;
$WIDTH = 600;
$MinHitFile $MinHitHost $MinHitKeyword
$MinHitLogin $MinHitRefer $MinHitRobot
$NbOfLinesRead $NbOfLinesDropped $NbOfLinesCorrupted $NbOfOldLines $NbOfNewLines
-$NowNewLinePhase $NbOfLinesForCorruptedLog $PurgeLogFile
+$NewLinePhase $NbOfLinesForCorruptedLog $PurgeLogFile
$ShowAuthenticatedUsers $ShowCompressionStats $ShowFileSizesStats
$ShowDropped $ShowCorrupted $ShowUnknownOrigin $ShowLinksToWhoIs
$SplitSearchString $StartSeconds $StartMicroseconds
$MinHitFile, $MinHitHost, $MinHitKeyword,
$MinHitLogin, $MinHitRefer, $MinHitRobot,
$NbOfLinesRead, $NbOfLinesDropped, $NbOfLinesCorrupted, $NbOfOldLines, $NbOfNewLines,
-$NowNewLinePhase, $NbOfLinesForCorruptedLog, $PurgeLogFile,
+$NewLinePhase, $NbOfLinesForCorruptedLog, $PurgeLogFile,
$ShowAuthenticatedUsers, $ShowCompressionStats, $ShowFileSizesStats,
$ShowDropped, $ShowCorrupted, $ShowUnknownOrigin, $ShowLinksToWhoIs,
$SplitSearchString, $StartSeconds, $StartMicroseconds,
my @field=split(/\s+/,$_);
if (! $field[0]) { next; }
if ($field[0] eq "FirstTime") { $FirstTime{$year.$month}=int($field[1]); next; }
- if ($field[0] eq "LastLine") { if ($LastLine{$year.$month} < int($field[1])) { $LastLine{$year.$month}=int($field[1]); }; next; }
+ if ($field[0] eq "LastLine") { if ($LastLine{$year.$month}||0 < int($field[1])) { $LastLine{$year.$month}=int($field[1]); }; next; }
if ($field[0] eq "FirstTime") { $FirstTime{$year.$month}=int($field[1]); next; }
- if ($field[0] eq "LastTime") { if ($LastTime{$year.$month} < int($field[1])) { $LastTime{$year.$month}=int($field[1]); }; next; }
+ if ($field[0] eq "LastTime") { if ($LastTime{$year.$month}||0 < int($field[1])) { $LastTime{$year.$month}=int($field[1]); }; next; }
if ($field[0] eq "TotalVisits") { $MonthVisits{$year.$month}=int($field[1]); next; }
if ($field[0] eq "LastUpdate") {
- if ($LastUpdate{$year.$month} < $field[1]) {
+ if ($LastUpdate{$year.$month}||0 < $field[1]) {
$LastUpdate{$year.$month}=int($field[1]);
#$LastUpdateLinesRead{$year.$month}=int($field[2]);
#$LastUpdateNewLinesRead{$year.$month}=int($field[3]);
if ($option) { $StartSeconds=0; } # Reset counter
my ($newseconds, $newmicroseconds)=(0,0);
my $usedTimeHires=0;
- ($newseconds, $newmicroseconds) = gettimeofday; $usedTimeHires=1; # Uncomment to use Time::HiRes function (provide milliseconds)
+# ($newseconds, $newmicroseconds) = gettimeofday; $usedTimeHires=1; # Uncomment to use Time::HiRes function (provide milliseconds)
if ((! $usedTimeHires) || ($newseconds eq "gettimeofday")) { $newseconds=time(); }
if (! $StartSeconds) { $StartSeconds=$newseconds; $StartMicroseconds=$newmicroseconds; }
my $nbms=$newseconds*1000+int($newmicroseconds/1000)-$StartSeconds*1000-int($StartMicroseconds/1000);
# READING THE LAST PROCESSED HISTORY FILE
#------------------------------------------
- my $monthtoprocess=0; my $yeartoprocess=0;
+ my $monthtoprocess=0; my $yeartoprocess=0; my $yearmonthtoprocess="";
# Search last history file $PROG(MM)(YYYY)$FileSuffix.txt
my $yearmonthmax=0;
my @filearray = sort readdir DIR;
close DIR;
foreach my $i (0..$#filearray) {
- if ("$filearray[$i]" =~ /^$PROG([\d][\d])([\d][\d][\d][\d])$FileSuffix\.txt$/) {
+ if ("$filearray[$i]" =~ /^$PROG(\d\d)(\d\d\d\d)$FileSuffix\.txt$/) {
if (int("$2$1") > $yearmonthmax) { $yearmonthmax=int("$2$1"); }
}
}
# We read last history file if found
- if ($yearmonthmax =~ /^([\d][\d][\d][\d])([\d][\d])$/) {
+ if ($yearmonthmax =~ /^(\d\d\d\d)(\d\d)$/) {
$monthtoprocess=int($2);$yeartoprocess=int($1);
# We read LastTime in this last history file.
&Read_History_File($yeartoprocess,$monthtoprocess,1);
# PROCESSING CURRENT LOG
#------------------------------------------
if ($Debug) { debug("Start of processing log file (monthtoprocess=$monthtoprocess, yeartoprocess=$yeartoprocess)"); }
- my $yearmonthtoprocess=sprintf("%04i%02i",$yeartoprocess,$monthtoprocess);
+ $yearmonthtoprocess=sprintf("%04i%02i",$yeartoprocess,$monthtoprocess);
$NbOfLinesRead=$NbOfLinesDropped=$NbOfLinesCorrupted=$NbOfOldLines=$NbOfNewLines=0;
- $NowNewLinePhase=0;
# Open log file
if ($Debug) { debug("Open log file \"$LogFile\""); }
# Skip if not a new line
#-----------------------
- if ($NowNewLinePhase) {
- if ($timerecord < $LastLine{$yearmonthtoprocess}) {
- $NbOfLinesCorrupted++; if ($ShowCorrupted) { print "Corrupted record (not sorted record): $_\n"; } next;
- } # Should not happen, kept in case of parasite/corrupted old line
+ if ($NewLinePhase) {
+ if ($timerecord < $LastLine{$yearmonthtoprocess} - $VisitTolerance) {
+ # Should not happen, kept in case of parasite/corrupted old line
+ $NbOfLinesCorrupted++; if ($ShowCorrupted) { print "Corrupted record (not sorted record): $_\n"; } next;
+ }
}
else {
if ($timerecord <= $LastLine{$yearmonthtoprocess}) {
next;
} # Already processed
# We found a new line. This will stop comparison "<=" between timerecord and LastLine (we should have only new lines now)
- $NowNewLinePhase=1;
+ $NewLinePhase=1;
if ($ShowSteps) { print "Phase 2 : Now process new records\n"; }
#GetDelaySinceStart(1);
}
# Skip for some client host IP addresses, some URLs, other URLs # !!!
my $qualifdrop="";
if (@SkipHosts && &SkipHost($field[$pos_rc])) { $qualifdrop="Dropped record (host $field[$pos_rc] not qualified by SkipHosts)"; }
- if (@SkipFiles && &SkipFile($field[$pos_url])) { $qualifdrop="Dropped record (URL $field[$pos_url] not qualified by SkipFiles)"; }
- if (@OnlyFiles && ! &OnlyFile($field[$pos_url])) { $qualifdrop="Dropped record (URL $field[$pos_url] not qualified by OnlyFiles)"; }
+ elsif (@SkipFiles && &SkipFile($field[$pos_url])) { $qualifdrop="Dropped record (URL $field[$pos_url] not qualified by SkipFiles)"; }
+ elsif (@OnlyFiles && ! &OnlyFile($field[$pos_url])) { $qualifdrop="Dropped record (URL $field[$pos_url] not qualified by OnlyFiles)"; }
if ($qualifdrop) {
$NbOfLinesDropped++;
if ($ShowDropped) { print "$qualifdrop: $_\n"; }
if ($timerecord < $timehostl) {
# Record is before record already read for this host and used for start of visit
# This occurs when log file is 'nearly' sorted
- print "xxxxxxxxxxxxxxxxxx";
+ # TODO change hostmachine_l and hostmachine_s and hotmachine_u
$_hostmachine_p{$_}++;
$_hostmachine_l{$_}=$timerecord;
$_hostmachine_u{$_}=$field[$pos_url];