#
KeepBackupOfHistoricFiles=0
-# Set your primary language.
-# Possible value:
-# Bosnian=ba, Chinese (Taiwan)=tw, Chinese (Traditional)=cn, Czech=cz,
-# Danish=dk, Dutch=nl, English=en, French=fr, German=de, Greek=gr,
-# Hungarian=hu, Indonesian=id, Italian=it, Japanese=jp, Korean=kr,
-# Latvian=lv, Norwegian (Nynorsk)=nn, Norwegian (Bokmal)=nb, Polish=pl,
-# Portuguese=pt, Portuguese (Brazilian)=br, Romanian=ro, Russian=ru,
-# Spanish=es, Swedish=se, Turkish=tr, Ukrainian=ua
-# Default: en
-#
-Lang="en"
-
-# Set the location of language files.
-# Example: "/opt/awstats/lang"
-# Default: "./lang" (means lang directory is in same location than awstats.pl)
-#
-DirLang="./lang"
-
# Index page name for your web server.
# Example: "default.htm"
# Default: "index.html"
# Note: This parameter is not case sensitive.
# Note: Use space between each value and do not remove default values.
# Note: xxx$ means URL ending with xxx.
-# Example: ".css$ .js$ .class$ robots.txt$ badpage.html /~"
-# Default: ".css$ .js$ .class$ robots.txt$"
+# Example: "robots.txt$ favicon.ico$ badpage.html /~"
+# Default: "robots.txt$ favicon.ico$"
#
-SkipFiles=".css$ .js$ .class$ robots.txt$"
+SkipFiles="robots.txt$ favicon.ico$"
# Include in stats, only accesses to URLs that match one of following entries.
# For example, if you want AWStats to filter access to keep only stats that
# Note: If you want to exclude your own URLs from stats (No Pages and no Hits
# reported), you should use SkipFiles parameter instead.
# Example: ""
-# Example: "gif jpg jpeg png bmp zip arj gz z wav mp3 wma mpg"
-# Default: "gif jpg jpeg png bmp"
+# Example: "css js class gif jpg jpeg png bmp zip arj gz z wav mp3 wma mpg"
+# Default: "css js class gif jpg jpeg png bmp"
#
-NotPageList="gif jpg jpeg png bmp"
+NotPageList="css js class gif jpg jpeg png bmp"
# By default, AWStats considers that records found in log file are successfull
# hits if HTTP code returned by server is a valid HTTP code (200 and 304).
#
NbOfLinesForCorruptedLog=50
-# Choose if you want week to start on sunday or monday
-# Possible values:
-# 0 - Week start on sunday
-# 1 - Week start on monday
-# Default: 1
-#
-FirstDayOfWeek=1
-
-# This value can be used to choose maximum number of lines shown for each
-# particular reporting.
-#
-# Stats by domains
-MaxNbOfDomain = 25
-# Stats by hosts
-MaxNbOfHostsShown = 25
-MinHitHost = 1
-# Stats by authenticated users
-MaxNbOfLoginShown = 10
-MinHitLogin = 1
-# Stats by robots
-MaxNbOfRobotShown = 25
-MinHitRobot = 1
-# Stats by pages
-MaxNbOfPageShown = 25
-MinHitFile = 1
-# Stats by referers
-MaxNbOfRefererShown = 25
-MinHitRefer = 1
-# Stats for keywords
-MaxNbOfKeywordsShown = 25
-MinHitKeyword = 1
-
# Search engines keywords reported are full search string or separate keywords
# Possible values:
# 0 - Search keywords reported are full search string (ie: "town maps")
#
Expires=0
+
+#-----------------------------------------------------------------------------
+# OPTIONAL ACCURACY SETUP SECTION (Not required but increase AWStats features)
+#-----------------------------------------------------------------------------
+
+# You can change value for following option to increase AWStats capabilities
+# (but this reduce AWStats speed).
+# Possible values: 0, 1 or 2
+# Default: 1
+#
+LevelForRobotsDetection=1 # 0 will increase AWStats speed by 1%.
+LevelForBrowsersDetection=1 # 0 disables Browsers detection. No speed gain.
+LevelForOSDetection=1 # 0 disables OS detection. No speed gain.
+LevelForRefererAnalyze=1 # 0 will increase AWStats speed by 6%.
+
+
+
+#-----------------------------------------------------------------------------
+# OPTIONAL APPEARANCE SETUP SECTION (Not required but increase AWStats features)
+#-----------------------------------------------------------------------------
+
+# Set your primary language.
+# Possible value:
+# Bosnian=ba, Chinese (Taiwan)=tw, Chinese (Traditional)=cn, Czech=cz,
+# Danish=dk, Dutch=nl, English=en, French=fr, German=de, Greek=gr,
+# Hungarian=hu, Indonesian=id, Italian=it, Japanese=jp, Korean=kr,
+# Latvian=lv, Norwegian (Nynorsk)=nn, Norwegian (Bokmal)=nb, Polish=pl,
+# Portuguese=pt, Portuguese (Brazilian)=br, Romanian=ro, Russian=ru,
+# Spanish=es, Swedish=se, Turkish=tr, Ukrainian=ua
+# Default: en
+#
+Lang="en"
+
+# Set the location of language files.
+# Example: "/opt/awstats/lang"
+# Default: "./lang" (means lang directory is in same location than awstats.pl)
+#
+DirLang="./lang"
+
# You choose here which summary report you want to see in the main page.
# Whatever is your setup here, all detailed reports are still available.
# So if you setup to 1 only ShowHeader, ShowMenu and ShowMonthDayStats, all
-# links in menu will point to detailed views.
+# links in menu will still point to detailed views.
# Possible values: 1 or 0
#
ShowHeader=1 # Show AWStats head title and icon
ShowKeywordsStats=1 # Not yet available
ShowHTTPErrorsStats=1
+# This value can be used to choose maximum number of lines shown for each
+# particular reporting.
+#
+# Stats by domains
+MaxNbOfDomain = 25
+# Stats by hosts
+MaxNbOfHostsShown = 25
+MinHitHost = 1
+# Stats by authenticated users
+MaxNbOfLoginShown = 10
+MinHitLogin = 1
+# Stats by robots
+MaxNbOfRobotShown = 25
+MinHitRobot = 1
+# Stats by pages
+MaxNbOfPageShown = 25
+MinHitFile = 1
+# Stats by referers
+MaxNbOfRefererShown = 25
+MinHitRefer = 1
+# Stats for keywords
+MaxNbOfKeywordsShown = 25
+MinHitKeyword = 1
+
+# Choose if you want week to start on sunday or monday
+# Possible values:
+# 0 - Week start on sunday
+# 1 - Week start on monday
+# Default: 1
+#
+FirstDayOfWeek=1
+
+# This parameter ask your browser to open detailed reports into a different
+# window than the main page.
+# Possible values: 1 or 0
+# Default: 1
+#
+DetailedReportsOnNewWindows=1
+
# List of visible flags with link to other language translations.
# If you don't want any flag link, set ShowFlagLinks to "".
# This parameter is used only if ShowHeader is set to 1.
# Default: 72
#
MaxLengthOfURL=72
-
-# This parameter ask your browser to open detailed reports into a different
-# window than the main page.
-# Possible values: 1 or 0
-# Default: 1
-#
-DetailedReportsOnNewWindows=1
# You can put here HTML code that will be added at the top of AWStats reports.
# Default: ""
#
HTMLEndSection=""
-# Value of maximum bar width/heigth for horizontal/vertical graphics bar
-# Default: 260/180
-#
-BarWidth = 260
-BarHeight = 180
-
# You can set Logo and LogoLink to use your own logo.
# Logo must be the name of image file (must be in $DirIcons/other directory).
# LogoLink is the expected URL when clicking on Logo.
Logo="awstats_logo1.png"
LogoLink="http://awstats.sourceforge.net"
+# Value of maximum bar width/heigth for horizontal/vertical graphics bar
+# Default: 260/180
+#
+BarWidth = 260
+BarHeight = 180
+
# You can ask AWStats to use a particular CSS (Cascading Style Sheet) to
-# change its look.
+# change its look (Feature not yet available).
# Example: "/css/awstats.css"
# Default: ""
#
#use diagnostics;
# Uncomment following line and a line into GetDelaySinceStart function to get
# miliseconds time in showsteps option
-#use Time::HiRes qw( gettimeofday );
+use Time::HiRes qw( gettimeofday );
#-------------------------------------------------------
# Defines
#-------------------------------------------------------
-my $VERSION="4.0 (build 45)";
+my $VERSION="4.0 (build 47)";
# ---------- Init variables -------
my $Debug=0;
my $DEBUGFORCED = 0; # Force debug level to log lesser level into debug.log file (Keep this value to 0)
my $MaxRowsInHTMLOutput = 1000; # Max number of rows for not limited HTML arrays
my $VisitTimeOut = 10000; # Laps of time to consider a page load as a new visit. 10000 = one hour (Default = 10000)
-my $FullHostName = 1; # 1 = Use name.domain.zone to refer host clients, 0 = all hosts in same domain.zone are one host (Default = 1, 0 never tested)
+#my $FullHostName = 1; # 1 = Use name.domain.zone to refer host clients, 0 = all hosts in same domain.zone are one host (Default = 1, 0 never tested)
my $NbOfLinesForBenchmark=5000;
my $ShowBackLink = 1;
my $WIDTH = 600;
my $tomorrowsec = my $tomorrowmin = my $tomorrowhour = my $tomorrowday = my $tomorrowmonth = my $tomorrowyear = my $tomorrowwday = 0;
my ($BarHeight,$BarWidth,$DebugResetDone,$Expires,
$CreateDirDataIfNotExists, $KeepBackupOfHistoricFiles, $MaxLengthOfURL,
-$NbOfLinesRead, $NbOfNewLinesProcessed, $NbOfLinesCorrupted, $NowNewLinePhase,
$MaxNbOfDomain, $MaxNbOfHostsShown, $MaxNbOfKeywordsShown, $MaxNbOfLoginShown,
$MaxNbOfPageShown, $MaxNbOfRefererShown, $MaxNbOfRobotShown,
$MinHitFile, $MinHitHost, $MinHitKeyword,
$MinHitLogin, $MinHitRefer, $MinHitRobot,
-$NbOfLinesForCorruptedLog, $PurgeLogFile,
+$NbOfLinesRead, $NbOfLinesDropped, $NbOfLinesEmpty, $NbOfLinesCorrupted, $NbOfNewLinesProcessed,
+$NowNewLinePhase, $NbOfLinesForCorruptedLog, $PurgeLogFile,
$ShowAuthenticatedUsers, $ShowCompressionStats, $ShowFileSizesStats,
-$ShowCorrupted, $SplitSearchString, $StartSeconds, $StartMicroseconds,
+$ShowDropped, $ShowCorrupted, $ShowUnknownOrigin, $SplitSearchString, $StartSeconds, $StartMicroseconds,
$StaticLinks, $UpdateStats, $URLWithQuery)=
-(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
+(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
my ($AllowToUpdateStatsFromBrowser, $ArchiveLogRecords, $DetailedReportsOnNewWindows,
$FirstDayOfWeek, $SaveDatabaseFilesWithPermissionsForEveryone,
+$LevelForRobotsDetection, $LevelForBrowsersDetection, $LevelForOSDetection,
+$LevelForSearchEnginesDetection, $LevelForKeywordsDetection, $LevelForRefererAnalyze,
$ShowHeader, $ShowMenu, $ShowMonthDayStats, $ShowDaysOfWeekStats,
$ShowHoursStats, $ShowDomainsStats, $ShowHostsStats,
$ShowRobotsStats, $ShowPagesStats, $ShowFileTypesStats,
$ShowBrowsersStats, $ShowOSStats, $ShowOriginStats, $ShowKeyphrasesStats,
$ShowKeywordsStats, $ShowHTTPErrorsStats,
$ShowFlagLinks, $ShowLinksOnUrl, $WarningMessages)=
-(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1);
+(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1);
my ($ArchiveFileName, $DayRequired, $DefaultFile, $FileConfig, $FileSuffix,
$HTMLHeadSection, $HTMLEndSection, $HTMLOutput, $Host,
$LastUpdate, $LogFile, $LogFormat, $LogFormatString, $Logo, $LogoLink,
# ---------- Init arrays --------
my @Message=();
my @HostAliases=();
-my @NotPageList=();
my @ValidHTTPCodes=();
my @OnlyFiles = my @SkipDNSLookupFor = my @SkipFiles = my @SkipHosts = ();
my @DOWIndex=();
my %DayBytes = my %DayHits = my %DayPages = my %DayUnique = my %DayVisits = ();
my %FirstTime = my %LastTime = my %LastLine = my %LastUpdate = ();
my %MonthBytes = my %MonthHits = my %MonthHostsKnown = my %MonthHostsUnknown = my %MonthPages = my %MonthUnique = my %MonthVisits = ();
+my %NotPageList=();
my %monthlib = my %monthnum = ();
my %HistoryFileAlreadyRead=();
#my %BrowsersHereAreGrabbers = my %BrowsersHashIcon = my %BrowsersHashIDLib = ();
foreach my $elem (@felter) { push @HostAliases,$elem; }
next;
}
- # Read optional section
+ # Read optional setup section
if ($param =~ /^CreateDirDataIfNotExists/) { $CreateDirDataIfNotExists=$value; next; }
if ($param =~ /^SaveDatabaseFilesWithPermissionsForEveryone/) { $SaveDatabaseFilesWithPermissionsForEveryone=$value; next; }
if ($param =~ /^PurgeLogFile/) { $PurgeLogFile=$value; next; }
if ($param =~ /^ArchiveLogRecords/) { $ArchiveLogRecords=$value; next; }
if ($param =~ /^KeepBackupOfHistoricFiles/) { $KeepBackupOfHistoricFiles=$value; next; }
- if ($param =~ /^Lang/) { $Lang=$value; next; }
- if ($param =~ /^DirLang/) { $DirLang=$value; next; }
if ($param =~ /^DefaultFile/) { $DefaultFile=$value; next; }
if ($param =~ /^SkipHosts/) {
$value =~ s/\\\./\./g; $value =~ s/([^\\])\./$1\\\./g; $value =~ s/^\./\\\./; # Replace . into \.
}
if ($param =~ /^NotPageList/) {
my @felter=split(/\s+/,$value);
- foreach my $elem (@felter) { push @NotPageList,$elem; }
+ foreach my $elem (@felter) { $NotPageList{$elem}=1; }
$foundNotPageList=1;
next;
}
$foundValidHTTPCodes=1;
next;
}
- if ($param =~ /^URLWithQuery/) { $URLWithQuery=$value; next; }
+ if ($param =~ /^URLWithQuery/) { $URLWithQuery=$value; next; }
if ($param =~ /^WarningMessages/) { $WarningMessages=$value; next; }
if ($param =~ /^NbOfLinesForCorruptedLog/) { $NbOfLinesForCorruptedLog=$value; next; }
- if ($param =~ /^FirstDayOfWeek/) { $FirstDayOfWeek=$value; next; }
- if ($param =~ /^MaxNbOfDomain/) { $MaxNbOfDomain=$value; next; }
- if ($param =~ /^MaxNbOfHostsShown/) { $MaxNbOfHostsShown=$value; next; }
- if ($param =~ /^MinHitHost/) { $MinHitHost=$value; next; }
- if ($param =~ /^MaxNbOfRobotShown/) { $MaxNbOfRobotShown=$value; next; }
- if ($param =~ /^MinHitRobot/) { $MinHitRobot=$value; next; }
- if ($param =~ /^MaxNbOfLoginShown/) { $MaxNbOfLoginShown=$value; next; }
- if ($param =~ /^MinHitLogin/) { $MinHitLogin=$value; next; }
- if ($param =~ /^MaxNbOfPageShown/) { $MaxNbOfPageShown=$value; next; }
- if ($param =~ /^MinHitFile/) { $MinHitFile=$value; next; }
- if ($param =~ /^MaxNbOfRefererShown/) { $MaxNbOfRefererShown=$value; next; }
- if ($param =~ /^MinHitRefer/) { $MinHitRefer=$value; next; }
- if ($param =~ /^MaxNbOfKeywordsShown/) { $MaxNbOfKeywordsShown=$value; next; }
- if ($param =~ /^MinHitKeyword/) { $MinHitKeyword=$value; next; }
if ($param =~ /^SplitSearchString/) { $SplitSearchString=$value; next; }
if ($param =~ /^Expires/) { $Expires=$value; next; }
+ # Read optional accuracy setup section
+ if ($param =~ /^LevelForRobotsDetection/) { $LevelForRobotsDetection=$value; next; } # Not used yet
+ if ($param =~ /^LevelForBrowsersDetection/) { $LevelForBrowsersDetection=$value; next; } # Not used yet
+ if ($param =~ /^LevelForOSDetection/) { $LevelForOSDetection=$value; next; } # Not used yet
+ if ($param =~ /^LevelForSearchEnginesDetection/) { $LevelForSearchEnginesDetection=$value; next; } # Not used yet
+ if ($param =~ /^LevelForRefererAnalyze/) { $LevelForRefererAnalyze=$value; next; } # Not used yet
+ if ($param =~ /^LevelForKeywordsDetection/) { $LevelForKeywordsDetection=$value; next; } # Not used yet
+ # Read optional appearance setup section
+ if ($param =~ /^Lang/) { $Lang=$value; next; }
+ if ($param =~ /^DirLang/) { $DirLang=$value; next; }
if ($param =~ /^ShowHeader/) { $ShowHeader=$value; next; }
if ($param =~ /^ShowMenu/) { $ShowMenu=$value; next; }
if ($param =~ /^ShowMonthDayStats/) { $ShowMonthDayStats=$value; next; }
if ($param =~ /^ShowKeywordsStats/) { $ShowKeywordsStats=$value; next; }
if ($param =~ /^ShowCompressionStats/) { $ShowCompressionStats=$value; next; }
if ($param =~ /^ShowHTTPErrorsStats/) { $ShowHTTPErrorsStats=$value; next; }
+ if ($param =~ /^FirstDayOfWeek/) { $FirstDayOfWeek=$value; next; }
+ if ($param =~ /^DetailedReportsOnNewWindows/) { $DetailedReportsOnNewWindows=$value; next; }
if ($param =~ /^ShowFlagLinks/) { $ShowFlagLinks=$value; next; }
if ($param =~ /^ShowLinksOnUrl/) { $ShowLinksOnUrl=$value; next; }
if ($param =~ /^MaxLengthOfURL/) { $MaxLengthOfURL=$value; next; }
- if ($param =~ /^MaxRowsInHTMLOutput/) { $MaxRowsInHTMLOutput=$value; next; }
- if ($param =~ /^DetailedReportsOnNewWindows/) { $DetailedReportsOnNewWindows=$value; next; }
+ if ($param =~ /^MaxRowsInHTMLOutput/) { $MaxRowsInHTMLOutput=$value; next; } # Not used yet
+ if ($param =~ /^MaxNbOfDomain/) { $MaxNbOfDomain=$value; next; }
+ if ($param =~ /^MaxNbOfHostsShown/) { $MaxNbOfHostsShown=$value; next; }
+ if ($param =~ /^MinHitHost/) { $MinHitHost=$value; next; }
+ if ($param =~ /^MaxNbOfRobotShown/) { $MaxNbOfRobotShown=$value; next; }
+ if ($param =~ /^MinHitRobot/) { $MinHitRobot=$value; next; }
+ if ($param =~ /^MaxNbOfLoginShown/) { $MaxNbOfLoginShown=$value; next; }
+ if ($param =~ /^MinHitLogin/) { $MinHitLogin=$value; next; }
+ if ($param =~ /^MaxNbOfPageShown/) { $MaxNbOfPageShown=$value; next; }
+ if ($param =~ /^MinHitFile/) { $MinHitFile=$value; next; }
+ if ($param =~ /^MaxNbOfRefererShown/) { $MaxNbOfRefererShown=$value; next; }
+ if ($param =~ /^MinHitRefer/) { $MinHitRefer=$value; next; }
+ if ($param =~ /^MaxNbOfKeywordsShown/) { $MaxNbOfKeywordsShown=$value; next; }
+ if ($param =~ /^MinHitKeyword/) { $MinHitKeyword=$value; next; }
if ($param =~ /^HTMLHeadSection/) { $HTMLHeadSection=$value; next; }
if ($param =~ /^HTMLEndSection/) { $HTMLEndSection=$value; next; }
if ($param =~ /^BarWidth/) { $BarWidth=$value; next; }
}
close CONFIG;
# If parameter NotPageList not found, init for backward compatibility
- if (! $foundNotPageList) {
- push @NotPageList,"gif"; push @NotPageList,"jpg"; push @NotPageList,"jpeg"; push @NotPageList,"png"; push @NotPageList,"bmp";
- }
+ $NotPageList{"gif"}=$NotPageList{"jpg"}=$NotPageList{"jpeg"}=$NotPageList{"png"}=$NotPageList{"bmp"}=1;
+
# If ValidHTTPCodes not found, init for backward compatibility
if (! $foundValidHTTPCodes) {
push @ValidHTTPCodes,"200"; push @ValidHTTPCodes,"304";
}
- debug(" NotPageList ".@NotPageList);
+ debug(" NotPageList ".(scalar keys %NotPageList));
debug(" ValidHTTPCodes ".@ValidHTTPCodes);
}
if ($LogFormat =~ /^[\d]$/ && $LogFormat !~ /[1-5]/) { error("Error: LogFormat parameter is wrong. Value is '$LogFormat' (should be 1,2,3,4,5 or a 'personalised AWtats log format string')"); }
if ($DNSLookup !~ /[0-1]/) { error("Error: DNSLookup parameter is wrong. Value is '$DNSLookup' (should be 0 or 1)"); }
if ($AllowToUpdateStatsFromBrowser !~ /[0-1]/) { $AllowToUpdateStatsFromBrowser=0; }
- # Optional section
+ # Optional setup section
if ($CreateDirDataIfNotExists !~ /[0-1]/) { $CreateDirDataIfNotExists=0; }
if ($SaveDatabaseFilesWithPermissionsForEveryone !~ /[0-1]/) { $SaveDatabaseFilesWithPermissionsForEveryone=1; }
if ($PurgeLogFile !~ /[0-1]/) { $PurgeLogFile=0; }
if ($URLWithQuery !~ /[0-1]/) { $URLWithQuery=0; }
if ($WarningMessages !~ /[0-1]/) { $WarningMessages=1; }
if ($NbOfLinesForCorruptedLog !~ /[\d]+/ || $NbOfLinesForCorruptedLog<1) { $NbOfLinesForCorruptedLog=50; }
- if ($FirstDayOfWeek !~ /[0-1]/) { $FirstDayOfWeek=1; }
- if ($MaxNbOfDomain !~ /^[\d]+/ || $MaxNbOfDomain<1) { $MaxNbOfDomain=25; }
- if ($MaxNbOfHostsShown !~ /^[\d]+/ || $MaxNbOfHostsShown<1) { $MaxNbOfHostsShown=25; }
- if ($MinHitHost !~ /^[\d]+/ || $MinHitHost<1) { $MinHitHost=1; }
- if ($MaxNbOfLoginShown !~ /^[\d]+/ || $MaxNbOfLoginShown<1) { $MaxNbOfLoginShown=10; }
- if ($MinHitLogin !~ /^[\d]+/ || $MinHitLogin<1) { $MinHitLogin=1; }
- if ($MaxNbOfRobotShown !~ /^[\d]+/ || $MaxNbOfRobotShown<1) { $MaxNbOfRobotShown=25; }
- if ($MinHitRobot !~ /^[\d]+/ || $MinHitRobot<1) { $MinHitRobot=1; }
- if ($MaxNbOfPageShown !~ /^[\d]+/ || $MaxNbOfPageShown<1) { $MaxNbOfPageShown=25; }
- if ($MinHitFile !~ /^[\d]+/ || $MinHitFile<1) { $MinHitFile=1; }
- if ($MaxNbOfRefererShown !~ /^[\d]+/ || $MaxNbOfRefererShown<1) { $MaxNbOfRefererShown=25; }
- if ($MinHitRefer !~ /^[\d]+/ || $MinHitRefer<1) { $MinHitRefer=1; }
- if ($MaxNbOfKeywordsShown !~ /^[\d]+/ || $MaxNbOfKeywordsShown<1) { $MaxNbOfKeywordsShown=25; }
- if ($MinHitKeyword !~ /^[\d]+/ || $MinHitKeyword<1) { $MinHitKeyword=1; }
- if ($MaxRowsInHTMLOutput !~ /^[\d]+/ || $MaxRowsInHTMLOutput<1) { $MaxRowsInHTMLOutput=1000; }
if ($SplitSearchString !~ /[0-1]/) { $SplitSearchString=0; }
if ($Expires !~ /^[\d]+/) { $Expires=0; }
+ # Optional accuracy setup section
+ if ($LevelForRobotsDetection !~ /^[\d]+/) { $LevelForRobotsDetection=1; }
+ if ($LevelForBrowsersDetection !~ /^[\d]+/) { $LevelForBrowsersDetection=1; }
+ if ($LevelForOSDetection !~ /^[\d]+/) { $LevelForOSDetection=1; }
+ if ($LevelForSearchEnginesDetection !~ /^[\d]+/) { $LevelForSearchEnginesDetection=1; }
+ if ($LevelForRefererAnalyze !~ /^[\d]+/) { $LevelForRefererAnalyze=1; }
+ if ($LevelForKeywordsDetection !~ /^[\d]+/) { $LevelForKeywordsDetection=1; }
+ # Optional appearance setup section
if ($ShowHeader !~ /[0-1]/) { $ShowHeader=1; }
if ($ShowMenu !~ /[0-1]/) { $ShowMenu=1; }
if ($ShowMonthDayStats !~ /[0-1]/) { $ShowMonthDayStats=1; }
if ($ShowCompressionStats !~ /[0-1]/) { $ShowCompressionStats=1; }
if ($ShowHTTPErrorsStats !~ /[0-1]/) { $ShowHTTPErrorsStats=1; }
if ($ShowLinksOnUrl !~ /[0-1]/) { $ShowLinksOnUrl=1; }
+ if ($MaxNbOfDomain !~ /^[\d]+/ || $MaxNbOfDomain<1) { $MaxNbOfDomain=25; }
+ if ($MaxNbOfHostsShown !~ /^[\d]+/ || $MaxNbOfHostsShown<1) { $MaxNbOfHostsShown=25; }
+ if ($MinHitHost !~ /^[\d]+/ || $MinHitHost<1) { $MinHitHost=1; }
+ if ($MaxNbOfLoginShown !~ /^[\d]+/ || $MaxNbOfLoginShown<1) { $MaxNbOfLoginShown=10; }
+ if ($MinHitLogin !~ /^[\d]+/ || $MinHitLogin<1) { $MinHitLogin=1; }
+ if ($MaxNbOfRobotShown !~ /^[\d]+/ || $MaxNbOfRobotShown<1) { $MaxNbOfRobotShown=25; }
+ if ($MinHitRobot !~ /^[\d]+/ || $MinHitRobot<1) { $MinHitRobot=1; }
+ if ($MaxNbOfPageShown !~ /^[\d]+/ || $MaxNbOfPageShown<1) { $MaxNbOfPageShown=25; }
+ if ($MinHitFile !~ /^[\d]+/ || $MinHitFile<1) { $MinHitFile=1; }
+ if ($MaxNbOfRefererShown !~ /^[\d]+/ || $MaxNbOfRefererShown<1) { $MaxNbOfRefererShown=25; }
+ if ($MinHitRefer !~ /^[\d]+/ || $MinHitRefer<1) { $MinHitRefer=1; }
+ if ($MaxNbOfKeywordsShown !~ /^[\d]+/ || $MaxNbOfKeywordsShown<1) { $MaxNbOfKeywordsShown=25; }
+ if ($MinHitKeyword !~ /^[\d]+/ || $MinHitKeyword<1) { $MinHitKeyword=1; }
+ if ($MaxRowsInHTMLOutput !~ /^[\d]+/ || $MaxRowsInHTMLOutput<1) { $MaxRowsInHTMLOutput=1000; }
if ($MaxLengthOfURL !~ /^[\d+]/ || $MaxLengthOfURL<1) { $MaxLengthOfURL=72; }
+ if ($FirstDayOfWeek !~ /[0-1]/) { $FirstDayOfWeek=1; }
if ($DetailedReportsOnNewWindows !~ /[0-1]/) { $DetailedReportsOnNewWindows=1; }
if ($BarWidth !~ /^[\d]+/ || $BarWidth<1) { $BarWidth=260; }
if ($BarHeight !~ /^[\d]+/ || $BarHeight<1) { $BarHeight=180; }
print HISTORYTMP "FirstTime $FirstTime{$year.$month}\n";
print HISTORYTMP "LastTime $LastTime{$year.$month}\n";
if ($LastUpdate{$year.$month} < int("$nowyear$nowmonth$nowday$nowhour$nowmin$nowsec")) { $LastUpdate{$year.$month}=int("$nowyear$nowmonth$nowday$nowhour$nowmin$nowsec"); }
- print HISTORYTMP "LastUpdate $LastUpdate{$year.$month} $NbOfLinesRead $NbOfNewLinesProcessed $NbOfLinesCorrupted\n";
+ print HISTORYTMP "LastUpdate $LastUpdate{$year.$month} $NbOfLinesRead $NbOfNewLinesProcessed $NbOfLinesCorrupted $NbOfLinesEmpty $NbOfLinesDropped\n";
print HISTORYTMP "TotalVisits $MonthVisits{$year.$month}\n";
# When
if ($option) { $StartSeconds=0; } # Reset counter
my ($newseconds, $newmicroseconds)=(0,0);
my $usedTimeHires=0;
-# ($newseconds, $newmicroseconds) = gettimeofday; $usedTimeHires=1; # Uncomment to use Time::HiRes function (provide milliseconds)
+ ($newseconds, $newmicroseconds) = gettimeofday; $usedTimeHires=1; # Uncomment to use Time::HiRes function (provide milliseconds)
if ((! $usedTimeHires) || ($newseconds eq "gettimeofday")) { $newseconds=time(); }
if (! $StartSeconds) { $StartSeconds=$newseconds; $StartMicroseconds=$newmicroseconds; }
my $nbms=$newseconds*1000+int($newmicroseconds/1000)-$StartSeconds*1000-int($StartMicroseconds/1000);
if ($QueryString =~ /output/i) { $UpdateStats=0; $HTMLOutput=1; } # Report and no update if an output is required
if ($QueryString =~ /update/i) { $UpdateStats=1; } # Except if -update specified
$QueryString=~s/output&//; $QueryString=~s/output$//;
- if ($QueryString =~ /showsteps/i) { $ShowSteps=1; } else { $ShowSteps=0; }
+ if ($QueryString =~ /showsteps/i) { $ShowSteps=1; }
$QueryString=~s/showsteps[^&]*//;
- if ($QueryString =~ /showcorrupted/i) { $ShowCorrupted=1; } else { $ShowCorrupted=0; }
+ if ($QueryString =~ /showcorrupted/i) { $ShowCorrupted=1; }
$QueryString=~s/showcorrupted[^&]*//;
+ if ($QueryString =~ /showdropped/i) { $ShowDropped=1; }
+ $QueryString=~s/showdropped[^&]*//;
+ if ($QueryString =~ /showunknownorigin/i) { $ShowUnknownOrigin=1; }
+ $QueryString=~s/showunknownorigin[^&]*//;
}
if ($QueryString =~ /logfile=([^\s&]+)$/i) { $LogFile=$1; }
if ($QueryString =~ /staticlinks/i) { $StaticLinks=1; }
}
debug("HostAliases is now @HostAliases",2);
# Init SkipFiles array
- if (! @SkipFiles) { $SkipFiles[0]="\.css\$";$SkipFiles[1]="\.js\$";$SkipFiles[2]="\.class\$";$SkipFiles[3]="robots\.txt\$"; }
+ if (! @SkipFiles) { $SkipFiles[0]="robots\.txt\$"; $SkipFiles[1]="favicon\.ico\$"; }
debug("SkipFiles is now @SkipFiles",2);
debug("Start Update process");
#------------------------------------------
&debug("Start of processing log file (monthtoprocess=$monthtoprocess, yeartoprocess=$yeartoprocess)");
my $yearmonth=sprintf("%04i%02i",$yeartoprocess,$monthtoprocess);
- $NbOfLinesRead=0; $NbOfNewLinesProcessed=0; $NbOfLinesCorrupted=0;
+ $NbOfLinesRead=$NbOfLinesEmpty=$NbOfLinesDropped=$NbOfLinesCorrupted=$NbOfNewLinesProcessed=0;
$NowNewLinePhase=0;
# Open log file
while (<LOG>)
{
- if (/^#/) { next; } # Ignore comment lines (ISS writes such comments)
- if (/^!/) { next; } # Ignore comment lines (Webstar writes such comments)
- #if (/^$/) { next; } # Ignore blank lines (With ISS: happens sometimes, with Apache: possible when editing log file)
- chomp $_; s/\r//;
-
$NbOfLinesRead++;
+ if (/^#/ || /^!/) {
+ $NbOfLinesEmpty++; # Ignore comment lines (ISS writes such comments)
+ next; # Ignore comment lines (Webstar writes such comments)
+ }
+ #if (/^$/) { $NbOfLinesEmpty++; next; } # Ignore blank lines (With ISS: happens sometimes, with Apache: possible when editing log file)
+ chomp $_; s/\r//;
+
# Parse line record to get all required fields
/^$PerlParsingFormat/;
my @field=();
# Check filters
#----------------------------------------------------------------------
my $protocol=0;
- if ($field[$pos_method] eq 'GET' || $field[$pos_method] eq 'POST' || $field[$pos_method] =~ /OK/) {
- # HTTP request. Keep only GET, POST, *OK* with Webstar but not HEAD, OPTIONS
+ if ($field[$pos_method] eq 'GET' || $field[$pos_method] eq 'POST' || $field[$pos_method] eq 'HEAD' || $field[$pos_method] =~ /OK/) {
+ # HTTP request. Keep only GET, POST, HEAD, *OK* with Webstar but not OPTIONS
$protocol=1;
}
elsif ($field[$pos_method] =~ /sent/ || $field[$pos_method] =~ /get/) {
# FTP request.
$protocol=2;
}
- if (! $protocol) { next; }
+ if (! $protocol) {
+ $NbOfLinesDropped++;
+ if ($ShowDropped) { print "Dropped record (not qualified method): $_\n"; }
+ next;
+ }
# Split DD/Month/YYYY:HH:MM:SS or YYYY-MM-DD HH:MM:SS or MM/DD/YY\tHH:MM:SS
$field[$pos_date] =~ tr/-\/ \t/::::/;
# Here, field array, datepart array, timeconnexion and dayconnexion are init for log record
&debug(" This is a not already processed record",3);
- # Record is approved. We found a new line
+
+ # We found a new line
#----------------------------------------
+ $LastLine{$yearmonth} = $timeconnexion;
+
+ # TODO. Add as a robot if URL is robots.txt
+
+ # Skip for some client host IP addresses, some URLs, other URLs
+ if ( &SkipFile($field[$pos_url]) || &SkipHost($field[$pos_rc]) || ! &OnlyFile($field[$pos_url]) ) {
+ $NbOfLinesDropped++;
+ if ($ShowDropped) { print "Dropped record (not qualified record): $_\n"; }
+ next;
+ }
+
+ # Record is approved
+ #-------------------
$NbOfNewLinesProcessed++;
if ($ShowSteps && ($NbOfNewLinesProcessed % $NbOfLinesForBenchmark == 0)) {
my $delay=GetDelaySinceStart(0);
print "$NbOfNewLinesProcessed lines processed ($delay ms, ".int(1000*$NbOfNewLinesProcessed/($delay>0?$delay:1))." lines/seconds)\n";
}
- $LastLine{$yearmonth} = $timeconnexion;
-
- # TODO. Add as a robot if URL is robots.txt
-
- if (&SkipHost($field[$pos_rc])) { next; } # Skip with some client host IP addresses
- if (&SkipFile($field[$pos_url])) { next; } # Skip with some URLs
- if (! &OnlyFile($field[$pos_url])) { next; } # Skip with other URLs
-
# Is it in a new month section ?
#-------------------------------
if (((int($dateparts[1]) > $monthtoprocess) && (int($dateparts[2]) >= $yeartoprocess)) || (int($dateparts[2]) > $yeartoprocess)) {
# Robot ?
#-------------------------------------------------------------------------
- if (!$TmpHashRobot{$UserAgent}) { # TmpHashRobot is a temporary hash table to increase speed
- # If made on each record -> -1300 rows/seconds
- my $foundrobot=0;
- # study $UserAgent
- foreach my $bot (@RobotsSearchIDOrder) {
- if ($UserAgent =~ /$bot/) {
- $foundrobot=1;
- $TmpHashRobot{$UserAgent}="$bot"; # Last time, we won't search if robot or not. We know it's is.
- last;
+ if ($LevelForRobotsDetection) {
+ if (!$TmpHashRobot{$UserAgent}) { # TmpHashRobot is a temporary hash table to increase speed
+ # If made on each record -> -1300 rows/seconds
+ my $foundrobot=0;
+ # study $UserAgent
+ foreach my $bot (@RobotsSearchIDOrder) {
+ if ($UserAgent =~ /$bot/) {
+ $foundrobot=1;
+ $TmpHashRobot{$UserAgent}="$bot"; # Last time, we won't search if robot or not. We know it's is.
+ last;
+ }
+ }
+ if (! $foundrobot) { # Last time, we won't search if robot or not. We know it's not.
+ $TmpHashRobot{$UserAgent}="-";
}
}
- if (! $foundrobot) { # Last time, we won't search if robot or not. We know it's not.
- $TmpHashRobot{$UserAgent}="-";
+ # If robot, we stop here
+ if ($TmpHashRobot{$UserAgent} ne "-") {
+ debug("UserAgent $UserAgent contains robot ID '$TmpHashRobot{$UserAgent}'",2);
+ $_robot_h{$TmpHashRobot{$UserAgent}}++; $_robot_l{$TmpHashRobot{$UserAgent}}=$timeconnexion;
+ next;
}
}
- # If robot, we stop here
- if ($TmpHashRobot{$UserAgent} ne "-") {
- debug("UserAgent $UserAgent contains robot ID '$TmpHashRobot{$UserAgent}'",2);
- $_robot_h{$TmpHashRobot{$UserAgent}}++; $_robot_l{$TmpHashRobot{$UserAgent}}=$timeconnexion;
- next;
- }
# Canonize and clean target URL and referrer URL. Possible URL syntax for $field[$pos_url]:
# /mypage.ext?param=x#aaa
$urlwithnoquery=$field[$pos_url];
}
# urlwithnoquery=/mypage.ext
- $field[$pos_url] =~ s/\/$DefaultFile$/\//; # Replace default page name with / only
# Analyze file type and compression
#----------------------------------
my $PageBool=1;
my $extension;
-
# Extension
if ($urlwithnoquery =~ /\.(\w{1,6})$/) {
$extension=$1; $extension =~ tr/A-Z/a-z/;
- # Check if not a page
- foreach my $cursor (@NotPageList) { if ($extension eq $cursor) { $PageBool=0; last; } }
+ if ($NotPageList{$extension}) { $PageBool=0; }
} else {
$extension="Unknown";
}
# Analyze: Date - Hour - Pages - Hits - Kilo
#-------------------------------------------
if ($PageBool) {
+
+ $field[$pos_url] =~ s/\/$DefaultFile$/\//; # Replace default page name with / only
+
# FirstTime and LastTime are First and Last human visits (so changed if access to a page)
if (! $FirstTime{$yearmonth}) { $FirstTime{$yearmonth}=$timeconnexion; }
$LastTime{$yearmonth} = $timeconnexion;
$_time_p[int($dateparts[3])]++; #Count accesses for hour (page)
$_url_p{$field[$pos_url]}++; #Count accesses for page (page)
$_url_k{$field[$pos_url]}+=$field[$pos_size];
- }
+ }
$_time_h[int($dateparts[3])]++; $MonthHits{$yearmonth}++; $DayHits{$dayconnexion}++; #Count accesses for hour (hit)
$_time_k[int($dateparts[3])]+=$field[$pos_size]; $MonthBytes{$yearmonth}+=$field[$pos_size]; $DayBytes{$dayconnexion}+=$field[$pos_size]; #Count accesses for hour (kb)
if ($HostIsIp && ((! $TmpHashDNSLookup{$Host}) || ($TmpHashDNSLookup{$Host} eq "ip"))) {
# Here $Host = IP address not resolved
if ($PageBool) {
- if ($timeconnexion > (($_hostmachine_l{$Host}||0)+$VisitTimeOut)) {
- $MonthVisits{$yearmonth}++;
- $DayVisits{$dayconnexion}++;
- if (! $_hostmachine_l{$Host}) { $MonthUnique{$yearmonth}++; }
- $_url_e{$field[$pos_url]}++; # Increase 'entry' page
- }
- $_hostmachine_p{$Host}++;
- $_hostmachine_l{$Host}=$timeconnexion;
- $_domener_p{"ip"}++;
+ if ($timeconnexion > (($_hostmachine_l{$Host}||0)+$VisitTimeOut)) {
+ $MonthVisits{$yearmonth}++;
+ $DayVisits{$dayconnexion}++;
+ if (! $_hostmachine_l{$Host}) { $MonthUnique{$yearmonth}++; }
+ $_url_e{$field[$pos_url]}++; # Increase 'entry' page
+ }
+ $_hostmachine_p{$Host}++;
+ $_hostmachine_l{$Host}=$timeconnexion;
+ $_domener_p{"ip"}++;
}
if (! $_hostmachine_h{$Host}) { $MonthHostsUnknown{$yearmonth}++; }
$_hostmachine_h{$Host}++;
$_domener_k{"ip"}+=$field[$pos_size];
}
else {
- # Here $TmpHashDNSLookup{$Host} is $Host resolved or undefined if $Host already resolved
+ # Here $TmpHashDNSLookup{$Host} is $Host resolved or undefined if $Host was already a host name
$_ = ($TmpHashDNSLookup{$Host}?$TmpHashDNSLookup{$Host}:$Host);
tr/A-Z/a-z/;
- if (!$FullHostName) { s/^[\w\-]+\.//; };
+ #if (!$FullHostName) { s/^[\w\-]+\.//; };
if ($PageBool) {
if ($timeconnexion > (($_hostmachine_l{$_}||0)+$VisitTimeOut)) {
# This is a new visit
$_hostmachine_k{$_}+=$field[$pos_size];
# Count top-level domain
- if (/\.([\w]+)$/) { $_=$1; }
+ if (/\.(\w+)$/) { $_=$1; }
if ($DomainsHashIDLib{$_}) {
if ($PageBool) { $_domener_p{$_}++; }
$_domener_h{$_}++;
if ($UserAgent) { # Made on each record -> -100 rows/seconds
+ if ($LevelForBrowsersDetection) {
+
# Analyze: Browser
#-----------------
my $found=0;
- if (!$TmpHashBrowser{$UserAgent}) {
+ if (! $TmpHashBrowser{$UserAgent}) {
# IE ? (For higher speed, we start whith IE, the most often used. This avoid other tests if found)
if (($UserAgent =~ /msie/) && ($UserAgent !~ /webtv/) && ($UserAgent !~ /omniweb/) && ($UserAgent !~ /opera/)) {
$_browser_h{"msie"}++;
if (!$found) {
$_browser_h{"Unknown"}++;
$_unknownrefererbrowser_l{$field[$pos_agent]}=$timeconnexion;
-# $TmpHashBrowser{$UserAgent}="Unknown";
+ $TmpHashBrowser{$UserAgent}="Unknown";
}
}
else {
if (!$found) { $_browser_h{$TmpHashBrowser{$UserAgent}}++; }
}
+ }
+
+ if ($LevelForOSDetection) {
+
# Analyze: OS
#------------
- if (!$TmpHashOS{$UserAgent}) {
+ if (! $TmpHashOS{$UserAgent}) {
my $found=0;
# in OSHashID list ?
foreach my $key (@OSSearchIDOrder) { # Searchin ID in order of OSSearchIDOrder
if (!$found) {
$_os_h{"Unknown"}++;
$_unknownreferer_l{$field[$pos_agent]}=$timeconnexion;
-# $TmpHashOS{$UserAgent}="Unknown";
+ $TmpHashOS{$UserAgent}="Unknown";
}
}
else {
$_os_h{$TmpHashOS{$UserAgent}}++;
}
+
+ }
+
}
else {
$_browser_h{"Unknown"}++;
# Analyze: Referer
#-----------------
my $found=0;
- if ($field[$pos_referer]) {
+ if ($field[$pos_referer] && $LevelForRefererAnalyze) {
# Direct ?
- if ($field[$pos_referer] eq "-") {
+ if ($field[$pos_referer] eq "-" || $field[$pos_referer] eq "bookmarks") { # "bookmarks" is sent by Netscape
if ($PageBool) { $_from_p[0]++; }
$_from_h[0]++;
$found=1;
}
else {
- $field[$pos_referer] =~ /^(\w+):\/\/([^\/]*)\//;
+ $field[$pos_referer] =~ /^(\w+):\/\/([^\/]+)/;
my $refererprot=$1;
my $refererserver=$2;
}
if (! $found) {
# Extern (This hit came from an external web site).
+
+ if ($LevelForSearchEnginesDetection) {
+
# If made on each record -> -1700 rows/seconds (should be made on 10% of records only)
foreach my $key (keys %SearchEnginesHashIDLib) {
# This hit came from the search engine $key
debug("Server $refererserver is added to TmpHashRefererServer with value '$key'",2);
$TmpHashRefererServer{$refererserver}="$key";
$found=1;
+ last;
}
}
+
+ }
+
}
}
# Intern (This hit came from another page of the site)
if ($PageBool) { $_from_p[4]++; }
$_from_h[4]++;
+ $found=1;
}
else {
# This hit came from the search engine
# Origin not found
if (!$found) {
+ if ($ShowUnknownOrigin) { print "Unknown origin: $field[$pos_referer]\n"; }
if ($PageBool) { $_from_p[1]++; }
$_from_h[1]++;
}
}
else {
print "Lines in file: $NbOfLinesRead\n";
- print "Found $NbOfNewLinesProcessed new records,\n";
- print "Found $NbOfLinesCorrupted corrupted records.\n";
+ print "Found $NbOfLinesEmpty empty records,\n";
+ print "Found $NbOfLinesDropped dropped records,\n";
+ print "Found $NbOfLinesCorrupted corrupted records,\n";
+ print "Found $NbOfNewLinesProcessed new records.\n";
}
0; # Do not remove this line