]> git.ipfire.org Git - thirdparty/sarg.git/commitdiff
Explain why some input log lines are ignored
authorFrederic Marchal <fmarchal@users.sourceforge.net>
Wed, 26 Dec 2012 16:23:54 +0000 (17:23 +0100)
committerFrederic Marchal <fmarchal@users.sourceforge.net>
Wed, 26 Dec 2012 16:23:54 +0000 (17:23 +0100)
When verbose mode is enabled, a listing explaining how many lines have
been excluded is displayed. Every reason to ignore a line is listed. It
should make it easier to figure out why the report is not generated.

readlog.c

index 86a652174f7fa5e8326e94d9c8e634ce7fb91f79..b91b23928963ffbda5c5768d3e8362722355941f 100644 (file)
--- a/readlog.c
+++ b/readlog.c
@@ -39,6 +39,56 @@ struct userfilestruct
        FILE *file;
 };
 
+enum ExcludeReasonEnum
+{
+       //! User name too long.
+       ER_UserNameTooLong,
+       //! Squid logged an incomplete query received from the client.
+       ER_IncompleteQuery,
+       //! Log file turned over.
+       ER_LogfileTurnedOver,
+       //! Line begins with a space.
+       ER_BeginWithSpace,
+       //! Excluded by exclude_string from sarg.conf.
+       ER_ExcludeString,
+       //! Unknown input log file format.
+       ER_UnknownFormat,
+       //! Line to be ignored from the input log file.
+       ER_FormatData,
+       //! Entry not withing the requested date range. 
+       ER_OutOfDateRange,
+       //! Ignored week day.
+       ER_OutOfWDayRange,
+       //! Ignored hour.
+       ER_OutOfHourRange,
+       //! User is not in the include_users list.
+       ER_User,
+       //! HTTP code excluded by exclude_code file.
+       ER_HttpCode,
+       //! Invalid character found in user name.
+       ER_InvalidUserChar,
+       //! No URL in entry.
+       ER_NoUrl,
+       //! Not the IP address requested with -a.
+       ER_UntrackedIpAddr,
+       //! URL excluded by -c or exclude_hosts.
+       ER_Url,
+       //! Entry time outside of requested hour range.
+       ER_OutOfTimeRange,
+       //! Not the URL requested by -s.
+       ER_UntrackedUrl,
+       //! No user in entry.
+       ER_NoUser,
+       //! Not the user requested by -u.
+       ER_UntrackedUser,
+       //! System user.
+       ER_SysUser,
+       //! User ignored by exclude_users 
+       ER_IgnoredUser,
+
+       ER_Last //!< last entry of the list
+};
+
 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
 
@@ -76,6 +126,8 @@ static unsigned long int format_count[sizeof(LogFormats)/sizeof(*LogFormats)];
 //! The minimum date found in the input logs.
 static int mindate=0;
 static int maxdate=0;
+//! Count the number of excluded records.
+static unsigned long int excluded_count[ER_Last];
 
 /*!
 Read a single log file.
@@ -211,9 +263,18 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
                what format they apply. They date back to pre 2.4 versions.
                */
                //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line
-               if(strstr(linebuf,"HTTP/0.0") != 0) continue;//recorded by squid when encountering an incomplete query
-               if(strstr(linebuf,"logfile turned over") != 0) continue;//reported by newsyslog
-               if(linebuf[0] == ' ') continue;
+               if(strstr(linebuf,"HTTP/0.0") != 0) {//recorded by squid when encountering an incomplete query
+                       excluded_count[ER_IncompleteQuery]++;
+                       continue;
+               }
+               if(strstr(linebuf,"logfile turned over") != 0) {//reported by newsyslog
+                       excluded_count[ER_LogfileTurnedOver]++;
+                       continue;
+               }
+               if(linebuf[0] == ' ') {
+                       excluded_count[ER_BeginWithSpace]++;
+                       continue;
+               }
 
                // exclude_string
                if(ExcludeString[0] != '\0') {
@@ -231,7 +292,10 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
                        }
                        if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
                                exstring=true;
-                       if(exstring) continue;
+                       if(exstring) {
+                               excluded_count[ER_ExcludeString]++;
+                               continue;
+                       }
                }
 
                totregsl++;
@@ -265,6 +329,7 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
                                        exit(EXIT_FAILURE);
                                }
                                debuga(_("The following line read from %s could not be parsed and is ignored\n%s\n"),arq,linebuf);
+                               excluded_count[ER_UnknownFormat]++;
                                continue;
                        }
                        current_format=LogFormats[x];
@@ -276,6 +341,7 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
                        successive_errors=0;
                }
                if (log_entry_status==RLRC_Ignore) {
+                       excluded_count[ER_FormatData]++;
                        continue;
                }
                if (current_format_idx<0 || current_format==NULL) {
@@ -321,19 +387,27 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
                        printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil);
 
                if(Filter->DateRange[0] != '\0'){
-                       if(idata < dfrom || idata > duntil) continue;
+                       if(idata < dfrom || idata > duntil) {
+                               excluded_count[ER_OutOfDateRange]++;
+                               continue;
+                       }
                }
 
                // Record only hours usage which is required
-               if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL )
+               if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL ) {
+                       excluded_count[ER_OutOfWDayRange]++;
                        continue;
+               }
 
-               if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL )
+               if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL ) {
+                       excluded_count[ER_OutOfHourRange]++;
                        continue;
+               }
 
 
                if(strlen(log_entry.User) > MAX_USER_LEN) {
                        if (debugm) printf(_("User ID too long: %s\n"),log_entry.User);
+                       excluded_count[ER_UserNameTooLong]++;
                        totregsx++;
                        continue;
                }
@@ -341,18 +415,23 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
                // include_users
                if(IncludeUsers[0] != '\0') {
                        snprintf(val1,sizeof(val1),":%s:",log_entry.User);
-                       if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
+                       if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL ) {
+                               excluded_count[ER_User]++;
                                continue;
+                       }
                }
 
                if(vercode(log_entry.HttpCode)) {
                        if (debugm) printf(_("Excluded code: %s\n"),log_entry.HttpCode);
+                       excluded_count[ER_HttpCode]++;
                        totregsx++;
                        continue;
                }
 
-               if(testvaliduserchar(log_entry.User))
+               if(testvaliduserchar(log_entry.User)) {
+                       excluded_count[ER_InvalidUserChar]++;
                        continue;
+               }
 
                // replace any tab by a single space
                for (str=log_entry.Url ; *str ; str++)
@@ -373,14 +452,21 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
                        download_flag=false;
 
                url=process_url(log_entry.Url,LongUrl);
-               if (!url || url[0] == '\0') continue;
+               if (!url || url[0] == '\0') {
+                       excluded_count[ER_NoUrl]++;
+                       continue;
+               }
 
                if(addr[0] != '\0'){
-                       if(strcmp(addr,log_entry.Ip)!=0) continue;
+                       if(strcmp(addr,log_entry.Ip)!=0) {
+                               excluded_count[ER_UntrackedIpAddr]++;
+                               continue;
+                       }
                }
                if(Filter->HostFilter) {
                        if(!vhexclude(url)) {
                                if (debugm) printf(_("Excluded site: %s\n"),url);
+                               excluded_count[ER_Url]++;
                                totregsx++;
                                continue;
                        }
@@ -388,11 +474,17 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
 
                if(Filter->StartTime >= 0 && Filter->EndTime >= 0) {
                        hmr=log_entry.EntryTime.tm_hour*100+log_entry.EntryTime.tm_min;
-                       if(hmr < Filter->StartTime || hmr > Filter->EndTime) continue;
+                       if(hmr < Filter->StartTime || hmr > Filter->EndTime) {
+                               excluded_count[ER_OutOfTimeRange]++;
+                               continue;
+                       }
                }
 
                if(site[0] != '\0'){
-                       if(strstr(url,site)==0) continue;
+                       if(strstr(url,site)==0) {
+                               excluded_count[ER_UntrackedUrl]++;
+                               continue;
+                       }
                }
 
                if(UserIp) {
@@ -405,8 +497,10 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
                                        log_entry.User=log_entry.Ip;
                                        id_is_ip=true;
                                }
-                               if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
+                               if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE) {
+                                       excluded_count[ER_NoUser]++;
                                        continue;
+                               }
                                if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
                                        log_entry.User="everybody";
                        } else {
@@ -420,26 +514,34 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
                }
 
                if(us[0] != '\0'){
-                       if(strcmp(log_entry.User,us)!=0) continue;
+                       if(strcmp(log_entry.User,us)!=0) {
+                               excluded_count[ER_UntrackedUser]++;
+                               continue;
+                       }
                }
 
                if(Filter->SysUsers) {
                        snprintf(wuser,sizeof(wuser),":%s:",log_entry.User);
-                       if(strstr(userfile, wuser) == 0)
+                       if(strstr(userfile, wuser) == 0) {
+                               excluded_count[ER_SysUser]++;
                                continue;
+                       }
                }
 
                if(Filter->UserFilter) {
                        if(!vuexclude(log_entry.User)) {
                                if (debugm) printf(_("Excluded user: %s\n"),log_entry.User);
+                               excluded_count[ER_IgnoredUser]++;
                                totregsx++;
                                continue;
                        }
                }
 
                if (log_entry.User[0]=='\0' || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' ||
-                               log_entry.User[0]==' ' || log_entry.User[0]==':')))
+                               log_entry.User[0]==' ' || log_entry.User[0]==':'))) {
+                       excluded_count[ER_NoUser]++;
                        continue;
+               }
 
                if (log_entry.DataSize<0) log_entry.DataSize=0;
 
@@ -573,6 +675,19 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
        }
 }
 
+/*!
+ * Display a line with the excluded entries count.
+ *
+ * \param Explain A translated string explaining the exluded count.
+ * \param Reason The reason number.
+ */
+static void DisplayExcludeCount(const char *Explain,enum ExcludeReasonEnum Reason)
+{
+       if (excluded_count[Reason]>0) {
+               debuga("   %s: %lu\n",Explain,excluded_count[Reason]);
+       }
+}
+
 /*!
 Read the log files.
 
@@ -591,6 +706,7 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
        const char *file;
 
        for (x=0 ; x<sizeof(format_count)/sizeof(*format_count) ; x++) format_count[x]=0;
+       for (x=0 ; x<sizeof(excluded_count)/sizeof(*excluded_count) ; x++) excluded_count[x]=0;
        first_user_file=NULL;
 
        if (!dataonly) {
@@ -662,6 +778,33 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
 
                debuga(_("   Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
 
+               for (x=sizeof(excluded_count)/sizeof(*excluded_count)-1 ; x>=0 && excluded_count[x]>0 ; x--);
+               if (x>=0) {
+                       debuga(_("Reasons for excluded entries:\n"));
+                       DisplayExcludeCount(_("User name too long"),ER_UserNameTooLong);
+                       DisplayExcludeCount(_("Squid logged an incomplete query received from the client"),ER_IncompleteQuery);
+                       DisplayExcludeCount(_("Log file turned over"),ER_LogfileTurnedOver);
+                       DisplayExcludeCount(_("Line begins with a space"),ER_BeginWithSpace);
+                       DisplayExcludeCount(_("Excluded by \"exclude_string\" in sarg.conf"),ER_ExcludeString);
+                       DisplayExcludeCount(_("Unknown input log file format"),ER_UnknownFormat);
+                       DisplayExcludeCount(_("Line ignored by the input log format"),ER_FormatData);
+                       DisplayExcludeCount(_("Time outside the requested date range (-d)"),ER_OutOfDateRange);
+                       DisplayExcludeCount(_("Ignored week day (\"weekdays\" parameter in sarg.conf)"),ER_OutOfWDayRange);
+                       DisplayExcludeCount(_("Ignored hour (\"hours\" parameter in sarg.conf)"),ER_OutOfHourRange);
+                       DisplayExcludeCount(_("User is not in the \"include_users\" list"),ER_User);
+                       DisplayExcludeCount(_("HTTP code excluded by \"exclude_code\" file"),ER_HttpCode);
+                       DisplayExcludeCount(_("Invalid character found in user name"),ER_InvalidUserChar);
+                       DisplayExcludeCount(_("No URL in entry"),ER_NoUrl);
+                       DisplayExcludeCount(_("Not the IP address requested with -a"),ER_UntrackedIpAddr);
+                       DisplayExcludeCount(_("URL excluded by -c or \"exclude_hosts\""),ER_Url);
+                       DisplayExcludeCount(_("Entry time outside of requested hour range (-t)"),ER_OutOfTimeRange);
+                       DisplayExcludeCount(_("Not the URL requested by -s"),ER_UntrackedUrl);
+                       DisplayExcludeCount(_("No user in entry"),ER_NoUser);
+                       DisplayExcludeCount(_("Not the user requested by -u"),ER_UntrackedUser);
+                       DisplayExcludeCount(_("System user as defined by \"password\" in sarg.conf"),ER_SysUser);
+                       DisplayExcludeCount(_("User ignored by \"exclude_users\""),ER_IgnoredUser);
+               }
+
                for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) {
                        if (format_count[x]>0) {
                                /* TRANSLATORS: It displays the number of lines found in the input log files