]> git.ipfire.org Git - thirdparty/AWStats.git/commitdiff
Add logresolvemerge tool
authoreldy <>
Sun, 16 Sep 2001 17:15:31 +0000 (17:15 +0000)
committereldy <>
Sun, 16 Sep 2001 17:15:31 +0000 (17:15 +0000)
wwwroot/cgi-bin/logresolvemerge.pl [new file with mode: 0644]

diff --git a/wwwroot/cgi-bin/logresolvemerge.pl b/wwwroot/cgi-bin/logresolvemerge.pl
new file mode 100644 (file)
index 0000000..5662f1c
--- /dev/null
@@ -0,0 +1,265 @@
+#!/usr/bin/perl
+# With some other Unix Os, first line may be
+#!/usr/local/bin/perl
+# With Apache for Windows and ActiverPerl, first line may be
+#!c:/program files/activeperl/bin/perl
+#-Description-------------------------------------------
+# Change a log file to make reverse DNS lookup on IPAdresses
+# Merge several log files into one
+# This tool is part of AWStats software
+# See COPYING.TXT file about AWStats GNU General Public License.
+#-------------------------------------------------------
+#use diagnostics;
+#use strict;
+
+
+#-------------------------------------------------------
+# Defines
+#-------------------------------------------------------
+
+# ---------- Init variables (Variable $TmpHashxxx are not initialized) --------
+($LogFile)=();
+# ---------- Init arrays --------
+@wordlist = ();
+# ---------- Init hash arrays --------
+%monthnum = ();
+
+$VERSION="1.0 (build 1)";
+$Lang="en";
+$NbOfLinesForBenchmark=5000;
+
+# These table is used to make fast reverse DNS lookup for particular IP adresses. You can add your own IP adresses resolutions.
+%MyDNSTable = (
+"256.256.256.1", "myworkstation1",
+"256.256.256.2", "myworkstation2"
+);
+
+
+
+#-------------------------------------------------------
+# Functions
+#-------------------------------------------------------
+sub error {
+       print "Error: $_[0].\n";
+    die;
+}
+
+sub debug {
+       my $level = $_[1] || 1;
+       if ($Debug >= $level) { 
+               my $debugstring = $_[0];
+               if ($ENV{"GATEWAY_INTERFACE"} ne "") { $debugstring =~ s/^ /&nbsp&nbsp /; $debugstring .= "<br>"; }
+               print "DEBUG $level - ".time." : $debugstring\n";
+               }
+       0;
+}
+
+sub SkipDNSLookup {
+       foreach my $match (@SkipDNSLookupFor) { if ($_[0] =~ /$match/i) { return 1; } }
+       0; # Not in @SkipDNSLookupFor
+}
+
+
+
+#-------------------------------------------------------
+# MAIN
+#-------------------------------------------------------
+my $cpt=1;
+for (0..@ARGV-1) {
+       if ($ARGV[$_] =~ /^-/) { last; }
+       $LogFile{$cpt}=$ARGV[$_];
+       $cpt++;
+}
+$QueryString=""; for (0..@ARGV-1) { $QueryString .= "$ARGV[$_] "; }
+if ($QueryString =~ /debug=/i) { $Debug=$QueryString; $Debug =~ s/.*debug=//; $Debug =~ s/&.*//; $Debug =~ s/ .*//; }
+if ($QueryString =~ /dnslookup/i) { $DNSLookup=1; }
+if ($QueryString =~ /-showsteps/i) { $ShowSteps=1; }
+($DIR=$0) =~ s/([^\/\\]*)$//; ($PROG=$1) =~ s/\.([^\.]*)$//; $Extension=$1;
+if (scalar keys %LogFile == 0) {
+       print "----- $PROG $VERSION (c) Laurent Destailleur -----\n";
+       print "$PROG is a log file merger and fast reverse DNS resolver.\n";
+       print "$PROG comes with ABSOLUTELY NO WARRANTY. It's a free software\n";
+       print "distributed with a GNU General Public License (See COPYING.txt file).\n";
+       print "\n";
+       print "Syntax: $PROG.$Extension file1 ... filen [-dnslookup]\n";
+       print "  This runs $PROG in command line to open one or several web server\n";
+       print "  log files to merge them (sorted on date) and/or to make a reverse DNS lookup.\n";
+       print "  The result log file is sent on standard output.\n";
+       print "Option:\n";
+       print "  -dnslookup  make a reverse DNS lookup on IP adresses (not done by default).\n";
+       print "  -showsteps  to add benchmark informations every $NbOfLinesForBenchmark lines processed\n";
+       print "\n";
+       print "Now supports/detects:\n";
+       print "  Automatic detection of log format\n";
+       print "  Multithreaded reverse DNS lookup ($LookupPool parallel request)\n";
+       print "  No need of extra Perl library\n";
+       print "New versions and FAQ at http://awstats.sourceforge.net\n";
+       exit 0;
+}
+
+# Get current time
+$nowtime=time;
+($nowsec,$nowmin,$nowhour,$nowday,$nowmonth,$nowyear) = localtime($nowtime);
+if ($nowyear < 100) { $nowyear+=2000; } else { $nowyear+=1900; }
+$nowsmallyear=$nowyear;$nowsmallyear =~ s/^..//;
+if (++$nowmonth < 10) { $nowmonth = "0$nowmonth"; }
+if ($nowday < 10) { $nowday = "0$nowday"; }
+if ($nowhour < 10) { $nowhour = "0$nowhour"; }
+if ($nowmin < 10) { $nowmin = "0$nowmin"; }
+if ($nowsec < 10) { $nowsec = "0$nowsec"; }
+# Get tomorrow time (will be used to discard some record with corrupted date (future date))
+($tomorrowsec,$tomorrowmin,$tomorrowhour,$tomorrowday,$tomorrowmonth,$tomorrowyear) = localtime($nowtime+86400);
+if ($tomorrowyear < 100) { $tomorrowyear+=2000; } else { $tomorrowyear+=1900; }
+$tomorrowsmallyear=$tomorrowyear;$tomorrowsmallyear =~ s/^..//;
+if (++$tomorrowmonth < 10) { $tomorrowmonth = "0$tomorrowmonth"; }
+if ($tomorrowday < 10) { $tomorrowday = "0$tomorrowday"; }
+if ($tomorrowhour < 10) { $tomorrowhour = "0$tomorrowhour"; }
+if ($tomorrowmin < 10) { $tomorrowmin = "0$tomorrowmin"; }
+if ($tomorrowsec < 10) { $tomorrowsec = "0$tomorrowsec"; }
+$timetomorrow=$tomorrowyear.$tomorrowmonth.$tomorrowday.$tomorrowhour.$tomorrowmin.$tomorrowsec;       
+
+# Init other parameters
+if ($ENV{"GATEWAY_INTERFACE"} ne "") { $DirCgi=""; }
+if (($DirCgi ne "") && !($DirCgi =~ /\/$/) && !($DirCgi =~ /\\$/)) { $DirCgi .= "/"; }
+if ($DirData eq "" || $DirData eq ".") { $DirData=$DIR; }      # If not defined or choosed to "." value then DirData is current dir
+if ($DirData eq "")  { $DirData="."; }                                         # If current dir not defined then we put it to "."
+$DirData =~ s/\/$//;
+if ($DNSLookup) { use Socket; }
+$NewDNSLookup=$DNSLookup;
+%monthlib =  ( "01","$message[60]","02","$message[61]","03","$message[62]","04","$message[63]","05","$message[64]","06","$message[65]","07","$message[66]","08","$message[67]","09","$message[68]","10","$message[69]","11","$message[70]","12","$message[71]" );
+# monthnum must be in english because it's used to translate log date in apache log files which are always in english
+%monthnum =  ( "Jan","01","jan","01","Feb","02","feb","02","Mar","03","mar","03","Apr","04","apr","04","May","05","may","05","Jun","06","jun","06","Jul","07","jul","07","Aug","08","aug","08","Sep","09","sep","09","Oct","10","oct","10","Nov","11","nov","11","Dec","12","dec","12" );
+
+#------------------------------------------
+# PROCESSING CURRENT LOG(s)
+#------------------------------------------
+&debug("Start of processing ".(scalar keys %LogFile)." log file(s)");
+%LogFileToDo=(); %NowNewLinePhase=(); %NbOfLinesRead=(); %NbOfLinesCorrupted=();
+$NbOfNewLinesProcessed=0; $NbOfNewLinesCorrupted=0;
+$logfilechosen=0;
+$starttime=time();
+
+# Open all log files
+foreach $logfilenb (keys %LogFile) {
+       &debug("Open log file number $logfilenb: \"$LogFile{$logfilenb}\"");
+       open("LOG$logfilenb","$LogFile{$logfilenb}") || error("Couldn't open server log file \"$LogFile{$logfilenb}\" : $!");
+       $LogFileToDo{$logfilenb}=$LogFile{$logfilenb};
+}
+
+while (1 == 1)
+{
+       # BEGIN Read new record (for each log file or only for log file with record just processed)
+       #------------------------------------------------------------------------------------------
+       foreach $logfilenb (keys %LogFileToDo) {
+               if (($logfilechosen == 0) || ($logfilechosen == $logfilenb)) {
+                       &debug("Search next record in file number $logfilenb",3);
+                       # Read chosen log file until we found a record with good date or reaching end of file
+                       while (1 == 1) {
+                               my $LOG="LOG$logfilenb"; $_=<$LOG>;     # Read new line
+                               if (! $_) {                                                     # No more records in log file number $logfilenb
+                                       &debug(" No more records in file number $logfilenb",2);
+                                       delete $LogFileToDo{$logfilenb};
+                                       last; }                                                                                 # We have all the new records for each other files, we stop here
+
+                               chomp $_; s/\r//;
+
+                               if (/^#/) { next; }                                                                     # Ignore comment lines (ISS writes such comments)
+                               if (/^!!/) { next; }                                                            # Ignore comment lines (Webstar writes such comments)
+                               if (/^$/) { next; }                                                                     # Ignore blank lines (With ISS: happens sometimes, with Apache: possible when editing log file)
+
+                               $NbOfLinesRead{$logfilenb}++;
+
+                               # Check filters
+                               #----------------------------------------------------------------------
+                               # Split DD/Month/YYYY:HH:MM:SS or YYYY-MM-DD HH:MM:SS or MM/DD/YY\tHH:MM:SS
+                               $linerecord{$logfilenb}=$_; 
+                               my $year=0; my $month=0; my $day=0; my $hour=0; my $minute=0; my $second=0;
+                               if ($_ =~ /(\d\d\d\d)-(\d\d)-(\d\d) (\d\d):(\d\d):(\d\d)/) { $year=$1; $month=$2; $day=$3; $hour=$4; $minute=$5; $second=$6; }
+                               if ($_ =~ /\[(\d\d)\/(.*)\/(\d\d\d\d):(\d\d):(\d\d):(\d\d) /) { $year=$3; $month=$2; $day=$1; $hour=$4; $minute=$5; $second=$6; }
+                               if ($monthnum{$month}) { $month=$monthnum{$month}; }    # Change lib month in num month if necessary
+
+                               # Create $timeconnexion like YYYYMMDDHHMMSS
+                               $timeconnexion{$logfilenb}=int("$year$month$day$hour$minute$second");
+                               if ($timeconnexion{$logfilenb}<10000000000000) {
+                                       &debug(" This record is corrupted (no date found)",3);
+                                       $corrupted{$logfilenb}++;
+                                       next;
+                               }
+                               &debug(" This is next record for file $logfilenb : timeconnexion=$timeconnexion{$logfilenb}",3);
+                               last;
+                       }
+               }
+       }
+       # END Read new lines for each log file. After this, following var are filled
+       # $timeconnexion{$logfilenb}
+       
+       # We choose wich record of wich log file to process
+       &debug("Choose of wich record of which log file to process",3);
+       $logfilechosen=-1;
+       my $timeref="99999999999999";
+       foreach my $logfilenb (keys %LogFileToDo) {
+               &debug(" timeconnexion for file $logfilenb is $timeconnexion{$logfilenb}",4);
+               if ($timeconnexion{$logfilenb} < $timeref) { $logfilechosen=$logfilenb; $timeref=$timeconnexion{$logfilenb} }
+       }
+       if ($logfilechosen <= 0) { last; }                                                              # No more record to process
+       # Record is chosen
+       &debug(" We choosed to analyze record of file number $logfilechosen",3);
+       &debug(" Record is $linerecord{$logfilechosen}",3);
+                       
+       # Record is approved. We found a new line to process in file number $logfilechosen
+       #----------------------------------------------------------------------------------
+       $NbOfNewLinesProcessed++;
+       if (($ShowSteps) && ($NbOfNewLinesProcessed % $NbOfLinesForBenchmark == 0)) { print STDERR "$NbOfNewLinesProcessed lines processed (".(time()-$starttime)." seconds, ".($NbOfNewLinesProcessed/(time()-$starttime))." lines/seconds)\n"; }
+
+       # Analyze: IP-address
+       #--------------------
+       if ($NewDNSLookup) {
+               $_ =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/;
+               $Host=$1;
+               if ($Host ne "") {
+                       $new=$TmpHashDNSLookup{$Host};  # TmpHashDNSLookup is a temporary hash table to increase speed
+                       if (!$new) {                                    # if $new undefined, $Host not yet resolved
+                               &debug(" Start of reverse DNS lookup for $Host",4);
+                               if ($MyDNSTable{$Host}) {
+                                       &debug(" End of reverse DNS lookup, found resolution of $Host in local MyDNSTable",4);
+                                       $new = $MyDNSTable{$Host};
+                               }
+                               else {
+                                       if (&SkipDNSLookup($Host)) {
+                                               &debug(" (Skipping this DNS lookup at user request.)",4);
+                                       }
+                                       else {
+                                               $new=gethostbyaddr(pack("C4",split(/\./,$Host)),AF_INET);       # This is very slow, may took 20 seconds
+                                       }
+                                       &debug(" End of reverse DNS lookup for $Host",4);
+                               }
+                               if ($new eq "") { $new="ip"; }
+                               $TmpHashDNSLookup{$Host}=$new;
+                       }
+                       # Here $Host is still xxx.xxx.xxx.xxx and $new is name or "ip" if reverse failed)
+                       if ($new ne "ip") { $_ =~ s/$Host/$new/; }
+           }
+               else {
+                       &debug(" No IP adresses found in this record.",3);
+               }
+       }
+
+       # Print record if ready
+       
+
+               print "$linerecord{$logfilechosen}\n";
+
+
+       # End of processing all new records.
+}
+&debug("End of processing log file(s)");
+
+
+# Close all log files
+foreach $logfilenb (keys %LogFile) {
+       &debug("Close log file number $logfilenb");
+       close("LOG$logfilenb");
+}
+
+
+0;     # Do not remove this line