From: Frederic Marchal Date: Wed, 26 Dec 2012 16:23:54 +0000 (+0100) Subject: Explain why some input log lines are ignored X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7c8c06c59c14465e92c6b619620b9a85c0d9f9f0;p=thirdparty%2Fsarg.git Explain why some input log lines are ignored When verbose mode is enabled, a listing explaining how many lines have been excluded is displayed. Every reason to ignore a line is listed. It should make it easier to figure out why the report is not generated. --- diff --git a/readlog.c b/readlog.c index 86a6521..b91b239 100644 --- a/readlog.c +++ b/readlog.c @@ -39,6 +39,56 @@ struct userfilestruct FILE *file; }; +enum ExcludeReasonEnum +{ + //! User name too long. + ER_UserNameTooLong, + //! Squid logged an incomplete query received from the client. + ER_IncompleteQuery, + //! Log file turned over. + ER_LogfileTurnedOver, + //! Line begins with a space. + ER_BeginWithSpace, + //! Excluded by exclude_string from sarg.conf. + ER_ExcludeString, + //! Unknown input log file format. + ER_UnknownFormat, + //! Line to be ignored from the input log file. + ER_FormatData, + //! Entry not withing the requested date range. + ER_OutOfDateRange, + //! Ignored week day. + ER_OutOfWDayRange, + //! Ignored hour. + ER_OutOfHourRange, + //! User is not in the include_users list. + ER_User, + //! HTTP code excluded by exclude_code file. + ER_HttpCode, + //! Invalid character found in user name. + ER_InvalidUserChar, + //! No URL in entry. + ER_NoUrl, + //! Not the IP address requested with -a. + ER_UntrackedIpAddr, + //! URL excluded by -c or exclude_hosts. + ER_Url, + //! Entry time outside of requested hour range. + ER_OutOfTimeRange, + //! Not the URL requested by -s. + ER_UntrackedUrl, + //! No user in entry. + ER_NoUser, + //! Not the user requested by -u. + ER_UntrackedUser, + //! System user. + ER_SysUser, + //! User ignored by exclude_users + ER_IgnoredUser, + + ER_Last //!< last entry of the list +}; + numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 }; numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 }; @@ -76,6 +126,8 @@ static unsigned long int format_count[sizeof(LogFormats)/sizeof(*LogFormats)]; //! The minimum date found in the input logs. static int mindate=0; static int maxdate=0; +//! Count the number of excluded records. +static unsigned long int excluded_count[ER_Last]; /*! Read a single log file. @@ -211,9 +263,18 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq) what format they apply. They date back to pre 2.4 versions. */ //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line - if(strstr(linebuf,"HTTP/0.0") != 0) continue;//recorded by squid when encountering an incomplete query - if(strstr(linebuf,"logfile turned over") != 0) continue;//reported by newsyslog - if(linebuf[0] == ' ') continue; + if(strstr(linebuf,"HTTP/0.0") != 0) {//recorded by squid when encountering an incomplete query + excluded_count[ER_IncompleteQuery]++; + continue; + } + if(strstr(linebuf,"logfile turned over") != 0) {//reported by newsyslog + excluded_count[ER_LogfileTurnedOver]++; + continue; + } + if(linebuf[0] == ' ') { + excluded_count[ER_BeginWithSpace]++; + continue; + } // exclude_string if(ExcludeString[0] != '\0') { @@ -231,7 +292,10 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq) } if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL ) exstring=true; - if(exstring) continue; + if(exstring) { + excluded_count[ER_ExcludeString]++; + continue; + } } totregsl++; @@ -265,6 +329,7 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq) exit(EXIT_FAILURE); } debuga(_("The following line read from %s could not be parsed and is ignored\n%s\n"),arq,linebuf); + excluded_count[ER_UnknownFormat]++; continue; } current_format=LogFormats[x]; @@ -276,6 +341,7 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq) successive_errors=0; } if (log_entry_status==RLRC_Ignore) { + excluded_count[ER_FormatData]++; continue; } if (current_format_idx<0 || current_format==NULL) { @@ -321,19 +387,27 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq) printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil); if(Filter->DateRange[0] != '\0'){ - if(idata < dfrom || idata > duntil) continue; + if(idata < dfrom || idata > duntil) { + excluded_count[ER_OutOfDateRange]++; + continue; + } } // Record only hours usage which is required - if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL ) + if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL ) { + excluded_count[ER_OutOfWDayRange]++; continue; + } - if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL ) + if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL ) { + excluded_count[ER_OutOfHourRange]++; continue; + } if(strlen(log_entry.User) > MAX_USER_LEN) { if (debugm) printf(_("User ID too long: %s\n"),log_entry.User); + excluded_count[ER_UserNameTooLong]++; totregsx++; continue; } @@ -341,18 +415,23 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq) // include_users if(IncludeUsers[0] != '\0') { snprintf(val1,sizeof(val1),":%s:",log_entry.User); - if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL ) + if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL ) { + excluded_count[ER_User]++; continue; + } } if(vercode(log_entry.HttpCode)) { if (debugm) printf(_("Excluded code: %s\n"),log_entry.HttpCode); + excluded_count[ER_HttpCode]++; totregsx++; continue; } - if(testvaliduserchar(log_entry.User)) + if(testvaliduserchar(log_entry.User)) { + excluded_count[ER_InvalidUserChar]++; continue; + } // replace any tab by a single space for (str=log_entry.Url ; *str ; str++) @@ -373,14 +452,21 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq) download_flag=false; url=process_url(log_entry.Url,LongUrl); - if (!url || url[0] == '\0') continue; + if (!url || url[0] == '\0') { + excluded_count[ER_NoUrl]++; + continue; + } if(addr[0] != '\0'){ - if(strcmp(addr,log_entry.Ip)!=0) continue; + if(strcmp(addr,log_entry.Ip)!=0) { + excluded_count[ER_UntrackedIpAddr]++; + continue; + } } if(Filter->HostFilter) { if(!vhexclude(url)) { if (debugm) printf(_("Excluded site: %s\n"),url); + excluded_count[ER_Url]++; totregsx++; continue; } @@ -388,11 +474,17 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq) if(Filter->StartTime >= 0 && Filter->EndTime >= 0) { hmr=log_entry.EntryTime.tm_hour*100+log_entry.EntryTime.tm_min; - if(hmr < Filter->StartTime || hmr > Filter->EndTime) continue; + if(hmr < Filter->StartTime || hmr > Filter->EndTime) { + excluded_count[ER_OutOfTimeRange]++; + continue; + } } if(site[0] != '\0'){ - if(strstr(url,site)==0) continue; + if(strstr(url,site)==0) { + excluded_count[ER_UntrackedUrl]++; + continue; + } } if(UserIp) { @@ -405,8 +497,10 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq) log_entry.User=log_entry.Ip; id_is_ip=true; } - if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE) + if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE) { + excluded_count[ER_NoUser]++; continue; + } if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY) log_entry.User="everybody"; } else { @@ -420,26 +514,34 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq) } if(us[0] != '\0'){ - if(strcmp(log_entry.User,us)!=0) continue; + if(strcmp(log_entry.User,us)!=0) { + excluded_count[ER_UntrackedUser]++; + continue; + } } if(Filter->SysUsers) { snprintf(wuser,sizeof(wuser),":%s:",log_entry.User); - if(strstr(userfile, wuser) == 0) + if(strstr(userfile, wuser) == 0) { + excluded_count[ER_SysUser]++; continue; + } } if(Filter->UserFilter) { if(!vuexclude(log_entry.User)) { if (debugm) printf(_("Excluded user: %s\n"),log_entry.User); + excluded_count[ER_IgnoredUser]++; totregsx++; continue; } } if (log_entry.User[0]=='\0' || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' || - log_entry.User[0]==' ' || log_entry.User[0]==':'))) + log_entry.User[0]==' ' || log_entry.User[0]==':'))) { + excluded_count[ER_NoUser]++; continue; + } if (log_entry.DataSize<0) log_entry.DataSize=0; @@ -573,6 +675,19 @@ static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq) } } +/*! + * Display a line with the excluded entries count. + * + * \param Explain A translated string explaining the exluded count. + * \param Reason The reason number. + */ +static void DisplayExcludeCount(const char *Explain,enum ExcludeReasonEnum Reason) +{ + if (excluded_count[Reason]>0) { + debuga(" %s: %lu\n",Explain,excluded_count[Reason]); + } +} + /*! Read the log files. @@ -591,6 +706,7 @@ int ReadLogFile(struct ReadLogDataStruct *Filter) const char *file; for (x=0 ; x=0 && excluded_count[x]>0 ; x--); + if (x>=0) { + debuga(_("Reasons for excluded entries:\n")); + DisplayExcludeCount(_("User name too long"),ER_UserNameTooLong); + DisplayExcludeCount(_("Squid logged an incomplete query received from the client"),ER_IncompleteQuery); + DisplayExcludeCount(_("Log file turned over"),ER_LogfileTurnedOver); + DisplayExcludeCount(_("Line begins with a space"),ER_BeginWithSpace); + DisplayExcludeCount(_("Excluded by \"exclude_string\" in sarg.conf"),ER_ExcludeString); + DisplayExcludeCount(_("Unknown input log file format"),ER_UnknownFormat); + DisplayExcludeCount(_("Line ignored by the input log format"),ER_FormatData); + DisplayExcludeCount(_("Time outside the requested date range (-d)"),ER_OutOfDateRange); + DisplayExcludeCount(_("Ignored week day (\"weekdays\" parameter in sarg.conf)"),ER_OutOfWDayRange); + DisplayExcludeCount(_("Ignored hour (\"hours\" parameter in sarg.conf)"),ER_OutOfHourRange); + DisplayExcludeCount(_("User is not in the \"include_users\" list"),ER_User); + DisplayExcludeCount(_("HTTP code excluded by \"exclude_code\" file"),ER_HttpCode); + DisplayExcludeCount(_("Invalid character found in user name"),ER_InvalidUserChar); + DisplayExcludeCount(_("No URL in entry"),ER_NoUrl); + DisplayExcludeCount(_("Not the IP address requested with -a"),ER_UntrackedIpAddr); + DisplayExcludeCount(_("URL excluded by -c or \"exclude_hosts\""),ER_Url); + DisplayExcludeCount(_("Entry time outside of requested hour range (-t)"),ER_OutOfTimeRange); + DisplayExcludeCount(_("Not the URL requested by -s"),ER_UntrackedUrl); + DisplayExcludeCount(_("No user in entry"),ER_NoUser); + DisplayExcludeCount(_("Not the user requested by -u"),ER_UntrackedUser); + DisplayExcludeCount(_("System user as defined by \"password\" in sarg.conf"),ER_SysUser); + DisplayExcludeCount(_("User ignored by \"exclude_users\""),ER_IgnoredUser); + } + for (x=0 ; x0) { /* TRANSLATORS: It displays the number of lines found in the input log files