FILE *file;
};
+enum ExcludeReasonEnum
+{
+ //! User name too long.
+ ER_UserNameTooLong,
+ //! Squid logged an incomplete query received from the client.
+ ER_IncompleteQuery,
+ //! Log file turned over.
+ ER_LogfileTurnedOver,
+ //! Line begins with a space.
+ ER_BeginWithSpace,
+ //! Excluded by exclude_string from sarg.conf.
+ ER_ExcludeString,
+ //! Unknown input log file format.
+ ER_UnknownFormat,
+ //! Line to be ignored from the input log file.
+ ER_FormatData,
+ //! Entry not withing the requested date range.
+ ER_OutOfDateRange,
+ //! Ignored week day.
+ ER_OutOfWDayRange,
+ //! Ignored hour.
+ ER_OutOfHourRange,
+ //! User is not in the include_users list.
+ ER_User,
+ //! HTTP code excluded by exclude_code file.
+ ER_HttpCode,
+ //! Invalid character found in user name.
+ ER_InvalidUserChar,
+ //! No URL in entry.
+ ER_NoUrl,
+ //! Not the IP address requested with -a.
+ ER_UntrackedIpAddr,
+ //! URL excluded by -c or exclude_hosts.
+ ER_Url,
+ //! Entry time outside of requested hour range.
+ ER_OutOfTimeRange,
+ //! Not the URL requested by -s.
+ ER_UntrackedUrl,
+ //! No user in entry.
+ ER_NoUser,
+ //! Not the user requested by -u.
+ ER_UntrackedUser,
+ //! System user.
+ ER_SysUser,
+ //! User ignored by exclude_users
+ ER_IgnoredUser,
+
+ ER_Last //!< last entry of the list
+};
+
numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
//! The minimum date found in the input logs.
static int mindate=0;
static int maxdate=0;
+//! Count the number of excluded records.
+static unsigned long int excluded_count[ER_Last];
/*!
Read a single log file.
what format they apply. They date back to pre 2.4 versions.
*/
//if(blen < 58) continue; //this test conflict with the reading of the sarg log header line
- if(strstr(linebuf,"HTTP/0.0") != 0) continue;//recorded by squid when encountering an incomplete query
- if(strstr(linebuf,"logfile turned over") != 0) continue;//reported by newsyslog
- if(linebuf[0] == ' ') continue;
+ if(strstr(linebuf,"HTTP/0.0") != 0) {//recorded by squid when encountering an incomplete query
+ excluded_count[ER_IncompleteQuery]++;
+ continue;
+ }
+ if(strstr(linebuf,"logfile turned over") != 0) {//reported by newsyslog
+ excluded_count[ER_LogfileTurnedOver]++;
+ continue;
+ }
+ if(linebuf[0] == ' ') {
+ excluded_count[ER_BeginWithSpace]++;
+ continue;
+ }
// exclude_string
if(ExcludeString[0] != '\0') {
}
if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
exstring=true;
- if(exstring) continue;
+ if(exstring) {
+ excluded_count[ER_ExcludeString]++;
+ continue;
+ }
}
totregsl++;
exit(EXIT_FAILURE);
}
debuga(_("The following line read from %s could not be parsed and is ignored\n%s\n"),arq,linebuf);
+ excluded_count[ER_UnknownFormat]++;
continue;
}
current_format=LogFormats[x];
successive_errors=0;
}
if (log_entry_status==RLRC_Ignore) {
+ excluded_count[ER_FormatData]++;
continue;
}
if (current_format_idx<0 || current_format==NULL) {
printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil);
if(Filter->DateRange[0] != '\0'){
- if(idata < dfrom || idata > duntil) continue;
+ if(idata < dfrom || idata > duntil) {
+ excluded_count[ER_OutOfDateRange]++;
+ continue;
+ }
}
// Record only hours usage which is required
- if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL )
+ if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL ) {
+ excluded_count[ER_OutOfWDayRange]++;
continue;
+ }
- if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL )
+ if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL ) {
+ excluded_count[ER_OutOfHourRange]++;
continue;
+ }
if(strlen(log_entry.User) > MAX_USER_LEN) {
if (debugm) printf(_("User ID too long: %s\n"),log_entry.User);
+ excluded_count[ER_UserNameTooLong]++;
totregsx++;
continue;
}
// include_users
if(IncludeUsers[0] != '\0') {
snprintf(val1,sizeof(val1),":%s:",log_entry.User);
- if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
+ if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL ) {
+ excluded_count[ER_User]++;
continue;
+ }
}
if(vercode(log_entry.HttpCode)) {
if (debugm) printf(_("Excluded code: %s\n"),log_entry.HttpCode);
+ excluded_count[ER_HttpCode]++;
totregsx++;
continue;
}
- if(testvaliduserchar(log_entry.User))
+ if(testvaliduserchar(log_entry.User)) {
+ excluded_count[ER_InvalidUserChar]++;
continue;
+ }
// replace any tab by a single space
for (str=log_entry.Url ; *str ; str++)
download_flag=false;
url=process_url(log_entry.Url,LongUrl);
- if (!url || url[0] == '\0') continue;
+ if (!url || url[0] == '\0') {
+ excluded_count[ER_NoUrl]++;
+ continue;
+ }
if(addr[0] != '\0'){
- if(strcmp(addr,log_entry.Ip)!=0) continue;
+ if(strcmp(addr,log_entry.Ip)!=0) {
+ excluded_count[ER_UntrackedIpAddr]++;
+ continue;
+ }
}
if(Filter->HostFilter) {
if(!vhexclude(url)) {
if (debugm) printf(_("Excluded site: %s\n"),url);
+ excluded_count[ER_Url]++;
totregsx++;
continue;
}
if(Filter->StartTime >= 0 && Filter->EndTime >= 0) {
hmr=log_entry.EntryTime.tm_hour*100+log_entry.EntryTime.tm_min;
- if(hmr < Filter->StartTime || hmr > Filter->EndTime) continue;
+ if(hmr < Filter->StartTime || hmr > Filter->EndTime) {
+ excluded_count[ER_OutOfTimeRange]++;
+ continue;
+ }
}
if(site[0] != '\0'){
- if(strstr(url,site)==0) continue;
+ if(strstr(url,site)==0) {
+ excluded_count[ER_UntrackedUrl]++;
+ continue;
+ }
}
if(UserIp) {
log_entry.User=log_entry.Ip;
id_is_ip=true;
}
- if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
+ if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE) {
+ excluded_count[ER_NoUser]++;
continue;
+ }
if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
log_entry.User="everybody";
} else {
}
if(us[0] != '\0'){
- if(strcmp(log_entry.User,us)!=0) continue;
+ if(strcmp(log_entry.User,us)!=0) {
+ excluded_count[ER_UntrackedUser]++;
+ continue;
+ }
}
if(Filter->SysUsers) {
snprintf(wuser,sizeof(wuser),":%s:",log_entry.User);
- if(strstr(userfile, wuser) == 0)
+ if(strstr(userfile, wuser) == 0) {
+ excluded_count[ER_SysUser]++;
continue;
+ }
}
if(Filter->UserFilter) {
if(!vuexclude(log_entry.User)) {
if (debugm) printf(_("Excluded user: %s\n"),log_entry.User);
+ excluded_count[ER_IgnoredUser]++;
totregsx++;
continue;
}
}
if (log_entry.User[0]=='\0' || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' ||
- log_entry.User[0]==' ' || log_entry.User[0]==':')))
+ log_entry.User[0]==' ' || log_entry.User[0]==':'))) {
+ excluded_count[ER_NoUser]++;
continue;
+ }
if (log_entry.DataSize<0) log_entry.DataSize=0;
}
}
+/*!
+ * Display a line with the excluded entries count.
+ *
+ * \param Explain A translated string explaining the exluded count.
+ * \param Reason The reason number.
+ */
+static void DisplayExcludeCount(const char *Explain,enum ExcludeReasonEnum Reason)
+{
+ if (excluded_count[Reason]>0) {
+ debuga(" %s: %lu\n",Explain,excluded_count[Reason]);
+ }
+}
+
/*!
Read the log files.
const char *file;
for (x=0 ; x<sizeof(format_count)/sizeof(*format_count) ; x++) format_count[x]=0;
+ for (x=0 ; x<sizeof(excluded_count)/sizeof(*excluded_count) ; x++) excluded_count[x]=0;
first_user_file=NULL;
if (!dataonly) {
debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
+ for (x=sizeof(excluded_count)/sizeof(*excluded_count)-1 ; x>=0 && excluded_count[x]>0 ; x--);
+ if (x>=0) {
+ debuga(_("Reasons for excluded entries:\n"));
+ DisplayExcludeCount(_("User name too long"),ER_UserNameTooLong);
+ DisplayExcludeCount(_("Squid logged an incomplete query received from the client"),ER_IncompleteQuery);
+ DisplayExcludeCount(_("Log file turned over"),ER_LogfileTurnedOver);
+ DisplayExcludeCount(_("Line begins with a space"),ER_BeginWithSpace);
+ DisplayExcludeCount(_("Excluded by \"exclude_string\" in sarg.conf"),ER_ExcludeString);
+ DisplayExcludeCount(_("Unknown input log file format"),ER_UnknownFormat);
+ DisplayExcludeCount(_("Line ignored by the input log format"),ER_FormatData);
+ DisplayExcludeCount(_("Time outside the requested date range (-d)"),ER_OutOfDateRange);
+ DisplayExcludeCount(_("Ignored week day (\"weekdays\" parameter in sarg.conf)"),ER_OutOfWDayRange);
+ DisplayExcludeCount(_("Ignored hour (\"hours\" parameter in sarg.conf)"),ER_OutOfHourRange);
+ DisplayExcludeCount(_("User is not in the \"include_users\" list"),ER_User);
+ DisplayExcludeCount(_("HTTP code excluded by \"exclude_code\" file"),ER_HttpCode);
+ DisplayExcludeCount(_("Invalid character found in user name"),ER_InvalidUserChar);
+ DisplayExcludeCount(_("No URL in entry"),ER_NoUrl);
+ DisplayExcludeCount(_("Not the IP address requested with -a"),ER_UntrackedIpAddr);
+ DisplayExcludeCount(_("URL excluded by -c or \"exclude_hosts\""),ER_Url);
+ DisplayExcludeCount(_("Entry time outside of requested hour range (-t)"),ER_OutOfTimeRange);
+ DisplayExcludeCount(_("Not the URL requested by -s"),ER_UntrackedUrl);
+ DisplayExcludeCount(_("No user in entry"),ER_NoUser);
+ DisplayExcludeCount(_("Not the user requested by -u"),ER_UntrackedUser);
+ DisplayExcludeCount(_("System user as defined by \"password\" in sarg.conf"),ER_SysUser);
+ DisplayExcludeCount(_("User ignored by \"exclude_users\""),ER_IgnoredUser);
+ }
+
for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) {
if (format_count[x]>0) {
/* TRANSLATORS: It displays the number of lines found in the input log files