/*
* SARG Squid Analysis Report Generator http://sarg.sourceforge.net
- * 1998, 2012
+ * 1998, 2015
*
* SARG donations:
* please look at http://sarg.sourceforge.net/donations.php
#include "include/conf.h"
#include "include/defs.h"
#include "include/readlog.h"
+#include "include/filelist.h"
#define REPORT_EVERY_X_LINES 5000
#define MAX_OPEN_USER_FILES 10
FILE *file;
};
+enum ExcludeReasonEnum
+{
+ //! User name too long.
+ ER_UserNameTooLong,
+ //! Squid logged an incomplete query received from the client.
+ ER_IncompleteQuery,
+ //! Log file turned over.
+ ER_LogfileTurnedOver,
+ //! Excluded by exclude_string from sarg.conf.
+ ER_ExcludeString,
+ //! Unknown input log file format.
+ ER_UnknownFormat,
+ //! Line to be ignored from the input log file.
+ ER_FormatData,
+ //! Entry not withing the requested date range.
+ ER_OutOfDateRange,
+ //! Ignored week day.
+ ER_OutOfWDayRange,
+ //! Ignored hour.
+ ER_OutOfHourRange,
+ //! User is not in the include_users list.
+ ER_User,
+ //! HTTP code excluded by exclude_code file.
+ ER_HttpCode,
+ //! Invalid character found in user name.
+ ER_InvalidUserChar,
+ //! No URL in entry.
+ ER_NoUrl,
+ //! Not the IP address requested with -a.
+ ER_UntrackedIpAddr,
+ //! URL excluded by -c or exclude_hosts.
+ ER_Url,
+ //! Entry time outside of requested hour range.
+ ER_OutOfTimeRange,
+ //! Not the URL requested by -s.
+ ER_UntrackedUrl,
+ //! No user in entry.
+ ER_NoUser,
+ //! Not the user requested by -u.
+ ER_UntrackedUser,
+ //! System user.
+ ER_SysUser,
+ //! User ignored by exclude_users
+ ER_IgnoredUser,
+
+ ER_Last //!< last entry of the list
+};
+
numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
+//! Domain suffix to strip from the user name.
+char StripUserSuffix[MAX_USER_LEN]="";
+//! Length of the suffix to strip from the user name.
+int StripSuffixLen=0;
-extern char *userfile;
+extern FileListObject AccessLog;
extern const struct ReadLogProcessStruct ReadSquidLog;
extern const struct ReadLogProcessStruct ReadCommonLog;
&ReadExtLog
};
+//! The path to the sarg log file.
+static char SargLogFile[4096]="";
+//! Handle to the sarg log file. NULL if not created.
+static FILE *fp_log=NULL;
+//! The number of records read from the input logs.
+static long int totregsl=0;
+//! The number of records kept.
+static long int totregsg=0;
+//! The number of records excluded.
+static long int totregsx=0;
+//! The beginning of a linked list of user's file.
+static struct userfilestruct *first_user_file=NULL;
+//! Count the number of occurence of each input log format.
+static unsigned long int format_count[sizeof(LogFormats)/sizeof(*LogFormats)];
+//! The minimum date found in the input logs.
+static int mindate=0;
+static int maxdate=0;
+//! Count the number of excluded records.
+static unsigned long int excluded_count[ER_Last];
+//! Earliest date found in the log.
+static int EarliestDate=-1;
+//! The earliest date in time format.
+static struct tm EarliestDateTime;
+//! Latest date found in the log.
+static int LatestDate=-1;
+//! The latest date in time format.
+static struct tm LatestDateTime;
+
/*!
-Read the log files.
+ * Read from standard input.
+ *
+ * \param Data The file object.
+ * \param Buffer The boffer to store the data read.
+ * \param Size How many bytes to read.
+ *
+ * \return The number of bytes read.
+ */
+static int Stdin_Read(void *Data,void *Buffer,int Size)
+{
+ return(fread(Buffer,1,Size,(FILE *)Data));
+}
-\param Filter The filtering parameters for the file to load.
+/*!
+ * Check if end of file is reached.
+ *
+ * \param Data The file object.
+ *
+ * \return \c True if end of file is reached.
+ */
+static int Stdin_Eof(void *Data)
+{
+ return(feof((FILE *)Data));
+}
-\retval 1 Records found.
-\retval 0 No record found.
+/*!
+ * Mimic a close of standard input but do nothing
+ *
+ * \param Data File to close.
+ *
+ * \return EOF on error.
+ */
+static int Stdin_Close(void *Data)
+{
+ return(0);
+}
+
+/*!
+ * Open a file object to read from standard input.
+ *
+ * \return The object to pass to other function in this module.
+ */
+static FileObject *Stdin_Open(void)
+{
+ FileObject *File;
+
+ FileObject_SetLastOpenError(NULL);
+ File=calloc(1,sizeof(*File));
+ if (!File)
+ {
+ FileObject_SetLastOpenError(_("Not enough memory"));
+ return(NULL);
+ }
+ File->Data=stdin;
+ File->Read=Stdin_Read;
+ File->Eof=Stdin_Eof;
+ File->Rewind=NULL;
+ File->Close=Stdin_Close;
+ return(File);
+}
+
+/*!
+ * Initialize the memory structure needed by LogLine_Parse() to parse
+ * a log line.
+ *
+ * \param log_line The structure to initialize.
+ */
+void LogLine_Init(struct LogLineStruct *log_line)
+{
+ log_line->current_format=NULL;
+ log_line->current_format_idx=-1;
+ log_line->file_name="";
+ log_line->successive_errors=0;
+ log_line->total_errors=0;
+}
+
+/*!
+ * Set the name of the log file being parsed.
+ *
+ * \param log_line Data structure to parse the log line.
+ * \param file_name The name of the log file being read.
+ */
+void LogLine_File(struct LogLineStruct *log_line,const char *file_name)
+{
+ log_line->file_name=file_name;
+}
+
+/*!
+ * Parse the next line from a log file.
+ *
+ * \param log_line A buffer to store the data about the current parsing.
+ * \param log_entry The variable to store the parsed data.
+ * \param linebuf The text line read from the log file.
+ *
+ * \return
+ */
+enum ReadLogReturnCodeEnum LogLine_Parse(struct LogLineStruct *log_line,struct ReadLogStruct *log_entry,char *linebuf)
+{
+ enum ReadLogReturnCodeEnum log_entry_status=RLRC_Unknown;
+ int x;
+
+ if (log_line->current_format)
+ {
+ memset(log_entry,0,sizeof(*log_entry));
+ log_entry_status=log_line->current_format->ReadEntry(linebuf,log_entry);
+ }
+
+ // find out what line format to use
+ if (log_entry_status==RLRC_Unknown)
+ {
+ for (x=0 ; x<(int)(sizeof(LogFormats)/sizeof(*LogFormats)) ; x++)
+ {
+ if (LogFormats[x]==log_line->current_format) continue;
+ memset(log_entry,0,sizeof(*log_entry));
+ log_entry_status=LogFormats[x]->ReadEntry(linebuf,log_entry);
+ if (log_entry_status!=RLRC_Unknown)
+ {
+ log_line->current_format=LogFormats[x];
+ log_line->current_format_idx=x;
+ if (debugz>=LogLevel_Process)
+ {
+ /* TRANSLATORS: The argument is the log format name as translated by you. */
+ debuga(__FILE__,__LINE__,_("Log format identified as \"%s\" for %s\n"),_(log_line->current_format->Name),log_line->file_name);
+ }
+ break;
+ }
+ }
+ if (x>=(int)(sizeof(LogFormats)/sizeof(*LogFormats)))
+ {
+ if (++log_line->successive_errors>NumLogSuccessiveErrors) {
+ debuga(__FILE__,__LINE__,ngettext("%d consecutive error found in the input log file %s\n",
+ "%d consecutive errors found in the input log file %s\n",log_line->successive_errors),log_line->successive_errors,log_line->file_name);
+ exit(EXIT_FAILURE);
+ }
+ if (NumLogTotalErrors>=0 && ++log_line->total_errors>NumLogTotalErrors) {
+ debuga(__FILE__,__LINE__,ngettext("%d error found in the input log file (last in %s)\n",
+ "%d errors found in the input log file (last in %s)\n",log_line->total_errors),log_line->total_errors,log_line->file_name);
+ exit(EXIT_FAILURE);
+ }
+ debuga(__FILE__,__LINE__,_("The following line read from %s could not be parsed and is ignored\n%s\n"),log_line->file_name,linebuf);
+ }
+ else
+ log_line->successive_errors=0;
+ }
+
+ if (log_line->current_format_idx<0 || log_line->current_format==NULL) {
+ debuga(__FILE__,__LINE__,_("Sarg failed to determine the format of the input log file %s\n"),log_line->file_name);
+ exit(EXIT_FAILURE);
+ }
+ if (log_entry_status==RLRC_InternalError) {
+ debuga(__FILE__,__LINE__,_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),log_line->file_name);
+ exit(EXIT_FAILURE);
+ }
+ return(log_entry_status);
+}
+
+/*!
+Read a single log file.
+
+\param arq The log file name to read.
*/
-int ReadLogFile(struct ReadLogDataStruct *Filter)
+static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
{
longline line;
char *linebuf;
char *str;
- char arq_log[255];
- //char fun[MAXLEN];
- //char elap[255];
- char user[MAX_USER_LEN];
- //char data[255];
- //char ip[60];
char hora[30];
- //char mes[30];
- char tbuf2[128];
char dia[128]="";
- char wuser[MAXLEN];
- char tmp3[MAXLEN];
- char sz_Download_Unsort[20000];
- char start_hour[128];
+ char tmp3[MAXLEN]="";
char download_url[MAXLEN];
char smartfilter[MAXLEN];
- const char *arq;
const char *url;
- int iarq;
- int blen;
int OutputNonZero = REPORT_EVERY_X_LINES ;
int idata=0;
- //int isa_ncols=0,isa_cols[ISACOL_Last];
int x;
int hmr;
int nopen;
int maxopenfiles=MAX_OPEN_USER_FILES;
- int mindate=0;
- int maxdate=0;
- int cstatus;
- int current_format_idx;
- int format_count[sizeof(LogFormats)/sizeof(*LogFormats)];
unsigned long int recs1=0UL;
unsigned long int recs2=0UL;
- long int totregsl=0;
- long int totregsg=0;
- long int totregsx=0;
- //long long int iyear, imonth, iday;
- FILE *fp_in=NULL;
- FILE *fp_log=NULL;
- FILE *fp_Download_Unsort=NULL;
- bool from_pipe;
- bool from_stdin;
+ FileObject *fp_in=NULL;
bool download_flag=false;
bool id_is_ip;
- bool totper=false;
enum ReadLogReturnCodeEnum log_entry_status;
+ enum UserProcessError PUser;
struct stat logstat;
struct getwordstruct gwarea;
- //struct tm tt;
struct userfilestruct *prev_ufile;
struct userinfostruct *uinfo;
- struct userfilestruct *first_user_file=NULL;
struct userfilestruct *ufile;
struct userfilestruct *ufile1;
struct ReadLogStruct log_entry;
- const struct ReadLogProcessStruct *current_format=NULL;
- //time_t tnum;
-
- for (x=0 ; x<sizeof(format_count)/sizeof(*format_count) ; x++) format_count[x]=0;
- tmp3[0]='\0';
- start_hour[0]='\0';
- first_user_file=NULL;
+ struct LogLineStruct log_line;
+ FILE *UseragentLog=NULL;
- snprintf(sz_Download_Unsort,sizeof(sz_Download_Unsort),"%s/download.int_unsort", tmp);
+ LogLine_Init(&log_line);
+ LogLine_File(&log_line,arq);
+ for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++)
+ if (LogFormats[x]->NewFile)
+ LogFormats[x]->NewFile(arq);
- if(DataFile[0]=='\0') {
- denied_open();
- authfail_open();
+ if (arq[0]=='-' && arq[1]=='\0') {
+ fp_in=Stdin_Open();
+ if(debug)
+ debuga(__FILE__,__LINE__,_("Reading access log file: from stdin\n"));
+ } else {
+ if (Filter->DateRange[0]!='\0') {
+ if (stat(arq,&logstat)!=0) {
+ debuga(__FILE__,__LINE__,_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
+ } else {
+ struct tm *logtime=localtime(&logstat.st_mtime);
+ if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
+ debuga(__FILE__,__LINE__,_("Ignoring old log file %s\n"),arq);
+ return;
+ }
+ }
+ }
+ fp_in=decomp(arq);
+ if (fp_in==NULL) {
+ debuga(__FILE__,__LINE__,_("Cannot open input log file \"%s\": %s\n"),arq,FileObject_GetLastOpenError());
+ exit(EXIT_FAILURE);
+ }
+ if (debug) debuga(__FILE__,__LINE__,_("Reading access log file: %s\n"),arq);
}
- if ((line=longline_create())==NULL) {
- debuga(_("Not enough memory to read a log file\n"));
- exit(EXIT_FAILURE);
- }
+ download_flag=false;
- for (iarq=0 ; iarq<NAccessLog ; iarq++) {
- arq=AccessLog[iarq];
+ recs1=0UL;
+ recs2=0UL;
- current_format=NULL;
- current_format_idx=-1;
- for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++)
- if (LogFormats[x]->NewFile)
- LogFormats[x]->NewFile(arq);
+ // pre-read the file only if we have to show stats
+ if (ShowReadStatistics && ShowReadPercent && fp_in->Rewind) {
+ int nread,i;
+ bool skipcr=false;
+ char tmp4[MAXLEN];
- if (arq[0]=='-' && arq[1]=='\0') {
- if(debug)
- debuga(_("Reading access log file: from stdin\n"));
- fp_in=stdin;
- from_stdin=true;
- } else {
- if (Filter->DateRange[0]!='\0') {
- if (stat(arq,&logstat)!=0) {
- debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
+ while ((nread=FileObject_Read(fp_in,tmp4,sizeof(tmp4)))>0) {
+ for (i=0 ; i<nread ; i++)
+ if (skipcr) {
+ if (tmp4[i]!='\n' && tmp4[i]!='\r') {
+ skipcr=false;
+ }
} else {
- struct tm *logtime=localtime(&logstat.st_mtime);
- if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
- debuga(_("Ignoring old log file %s\n"),arq);
- continue;
+ if (tmp4[i]=='\n' || tmp4[i]=='\r') {
+ skipcr=true;
+ recs1++;
}
}
- }
- fp_in=decomp(arq,&from_pipe);
- if(fp_in==NULL) {
- debuga(_("(log) Cannot open log file: %s - %s\n"),arq,strerror(errno));
- exit(EXIT_FAILURE);
- }
- if(debug) debuga(_("Reading access log file: %s\n"),arq);
- from_stdin=false;
}
+ FileObject_Rewind(fp_in);
+ printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
+ putchar('\r');
+ fflush( stdout ) ;
+ }
- download_flag=false;
-
- recs1=0UL;
- recs2=0UL;
+ if ((line=longline_create())==NULL) {
+ debuga(__FILE__,__LINE__,_("Not enough memory to read file \"%s\"\n"),arq);
+ exit(EXIT_FAILURE);
+ }
- // pre-read the file only if we have to show stats
- if (ShowReadStatistics && ShowReadPercent && !from_stdin && !from_pipe) {
- size_t nread,i;
- bool skipcr=false;
- char tmp4[MAXLEN];
+ while ((linebuf=longline_read(fp_in,line))!=NULL) {
+ lines_read++;
- while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
- for (i=0 ; i<nread ; i++)
- if (skipcr) {
- if (tmp4[i]!='\n' && tmp4[i]!='\r') {
- skipcr=false;
- }
- } else {
- if (tmp4[i]=='\n' || tmp4[i]=='\r') {
- skipcr=true;
- recs1++;
- }
- }
+ recs2++;
+ if (ShowReadStatistics && --OutputNonZero<=0) {
+ if (recs1>0) {
+ double perc = recs2 * 100. / recs1 ;
+ printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
+ } else {
+ printf(_("SARG: Records in file: %lu"),recs2);
}
- rewind(fp_in);
- printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
putchar('\r');
- fflush( stdout ) ;
+ fflush (stdout);
+ OutputNonZero = REPORT_EVERY_X_LINES ;
}
- longline_reset(line);
-
- while ((linebuf=longline_read(fp_in,line))!=NULL) {
- blen=strlen(linebuf);
-
-#if 0
- if (ilf==ILF_Unknown) {
- if(strncmp(linebuf,"#Software: Mic",14) == 0) {
- if (debug) {
- fixendofline(linebuf);
- debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
- }
- ilf=ILF_Isa;
- ilf_count[ilf]++;
- continue;
- }
+ /*
+ The following checks are retained here as I don't know to
+ what format they apply. They date back to pre 2.4 versions.
+ */
+ //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line
+ if(strstr(linebuf,"HTTP/0.0") != 0) {//recorded by squid when encountering an incomplete query
+ excluded_count[ER_IncompleteQuery]++;
+ continue;
+ }
+ if(strstr(linebuf,"logfile turned over") != 0) {//reported by newsyslog
+ excluded_count[ER_LogfileTurnedOver]++;
+ continue;
+ }
- if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
- if (getperiod_fromsarglog(arq,&period)<0) {
- debuga(_("The name of the file is invalid: %s\n"),arq);
- exit(EXIT_FAILURE);
- }
- ilf=ILF_Sarg;
- ilf_count[ilf]++;
- continue;
+ // exclude_string
+ if(ExcludeString[0] != '\0') {
+ bool exstring=false;
+ getword_start(&gwarea,ExcludeString);
+ while(strchr(gwarea.current,':') != 0) {
+ if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
+ debuga(__FILE__,__LINE__,_("Invalid record in exclusion string\n"));
+ exit(EXIT_FAILURE);
}
- }
-#endif
-
- recs2++;
- if (ShowReadStatistics && --OutputNonZero<=0) {
- if (recs1>0) {
- double perc = recs2 * 100. / recs1 ;
- printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
- } else {
- printf(_("SARG: Records in file: %lu"),recs2);
+ if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
+ exstring=true;
+ break;
}
- putchar('\r');
- fflush (stdout);
- OutputNonZero = REPORT_EVERY_X_LINES ;
}
-
- /*
- The following checks are retained here as I don't know to
- what format they apply. They date back to pre 2.4 versions.
- */
- //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line
- if(strstr(linebuf,"HTTP/0.0") != 0) continue;//recorded by squid when encountering an incomplete query
- if(strstr(linebuf,"logfile turned over") != 0) continue;//reported by newsyslog
- if(linebuf[0] == ' ') continue;
-
- // exclude_string
- if(ExcludeString[0] != '\0') {
- bool exstring=false;
- getword_start(&gwarea,ExcludeString);
- while(strchr(gwarea.current,':') != 0) {
- if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
- debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
- exit(EXIT_FAILURE);
- }
- if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
- exstring=true;
- break;
- }
- }
- if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
- exstring=true;
- if(exstring) continue;
+ if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
+ exstring=true;
+ if(exstring) {
+ excluded_count[ER_ExcludeString]++;
+ continue;
}
+ }
- totregsl++;
- if(debugm)
- printf("BUF=%s\n",linebuf);
+ totregsl++;
+ if (debugz>=LogLevel_Data)
+ printf("BUF=%s\n",linebuf);
- // process the line
- log_entry_status=RLRC_Unknown;
- memset(&log_entry,0,sizeof(log_entry));
- if (current_format) {
- log_entry_status=current_format->ReadEntry(linebuf,&log_entry);
- }
+ // process the line
+ log_entry_status=LogLine_Parse(&log_line,&log_entry,linebuf);
+ if (log_entry_status==RLRC_Unknown)
+ {
+ excluded_count[ER_UnknownFormat]++;
+ continue;
+ }
+ if (log_entry_status==RLRC_Ignore) {
+ excluded_count[ER_FormatData]++;
+ continue;
+ }
+ format_count[log_line.current_format_idx]++;
- // find out what line format to use
- if (log_entry_status==RLRC_Unknown) {
- x=-1;
- while (log_entry_status==RLRC_Unknown && x<(int)(sizeof(LogFormats)/sizeof(*LogFormats))) {
- x++;
- if (LogFormats[x]==current_format) continue;
- memset(&log_entry,0,sizeof(log_entry));
- log_entry_status=LogFormats[x]->ReadEntry(linebuf,&log_entry);
- }
- if (x<0 || x>=(int)(sizeof(LogFormats)/sizeof(*LogFormats))) {
- debuga(_("Unknown line format found in input log file %s\n"),arq);
- exit(EXIT_FAILURE);
- }
- current_format=LogFormats[x];
- current_format_idx=x;
- if (debugz) {
- /* TRANSLATORS: The argument is the log format name as translated by you. */
- debuga(_("Log format identified as \"%s\" for %s\n"),_(current_format->Name),arq);
- }
+ if (!fp_log && ParsedOutputLog[0] && log_line.current_format!=&ReadSargLog) {
+ if(access(ParsedOutputLog,R_OK) != 0) {
+ my_mkdir(ParsedOutputLog);
}
- if (log_entry_status==RLRC_Ignore) {
- continue;
- }
- if (current_format_idx<0 || current_format==NULL) {
- debuga(_("Sarg couldn't determine the format of the input log file %s\n"),arq);
+ if (snprintf(SargLogFile,sizeof(SargLogFile),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(SargLogFile)) {
+ debuga(__FILE__,__LINE__,_("Path too long: "));
+ debuga_more("%s/sarg_temp.log\n",ParsedOutputLog);
exit(EXIT_FAILURE);
}
- if (log_entry_status==RLRC_InternalError) {
- debuga(_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),arq);
+ if((fp_log=MY_FOPEN(SargLogFile,"w"))==NULL) {
+ debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),SargLogFile,strerror(errno));
exit(EXIT_FAILURE);
}
- format_count[current_format_idx]++;
-
- if (!fp_log && ParsedOutputLog[0] && current_format!=&ReadSargLog) {
- if(access(ParsedOutputLog,R_OK) != 0) {
- my_mkdir(ParsedOutputLog);
- }
- if (snprintf(arq_log,sizeof(arq_log),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(arq_log)) {
- debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog);
- exit(EXIT_FAILURE);
- }
- if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
- debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
- exit(EXIT_FAILURE);
- }
- fputs("*** SARG Log ***\n",fp_log);
- }
-
-#if 0
- if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
- getword_start(&gwarea,linebuf);
- if (getword(data,sizeof(data),&gwarea,' ')<0) {
- debuga(_("Maybe you have a broken time in your access.log file\n"));
- exit(EXIT_FAILURE);
- }
- if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
- strcpy(ip,data);
- log_entry.Ip=ip;
- if(squid24) {
- if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- } else {
- if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- }
- log_entry.User=user;
- if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
- getword(fun,sizeof(fun),&gwarea,' ')<0) {
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword_ptr(linebuf,&log_entry.Url,&gwarea,' ')<0) {
- debuga(_("Maybe you have a broken url in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword_skip(MAXLEN,&gwarea,' ')<0) {
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword(code2,sizeof(code2),&gwarea,' ')<0) {
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword_atoll(&log_entry.DataSize,&gwarea,' ')<0) {
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
- if (getword(code,sizeof(code),&gwarea,' ')<0) {
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- } else {
- if (getword(code,sizeof(code),&gwarea,'\0')<0) {
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- }
-
- if ((str = strchr(code, ':')) != NULL)
- *str = '/';
- log_entry.HttpCode=code;
-
- ilf=ILF_Common;
- ilf_count[ilf]++;
-
- getword_start(&gwarea,data+1);
- if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
- debuga(_("Maybe you have a broken date in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
- debuga(_("Maybe you have a broken date in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- getword_start(&gwarea,data);
- if (getword_atoll(&iday,&gwarea,'/')<0){
- debuga(_("Maybe you have a broken date in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword(mes,sizeof(mes),&gwarea,'/')<0){
- debuga(_("Maybe you have a broken date in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword_atoll(&iyear,&gwarea,'/')<0){
- debuga(_("Maybe you have a broken date in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
-
- imonth=month2num(mes)+1;
- idata=builddia(iday,imonth,iyear);
- computedate(iyear,imonth,iday,&tt);
- if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
- tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
- debuga(_("Invalid time found in %s\n"),arq);
- exit(EXIT_FAILURE);
- }
- log_entry.EntryTime=&tt;
- }
-
- if(ilf==ILF_Unknown || ilf==ILF_Squid) {
- if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
- debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
- if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
- debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if(strlen(elap) < 1) continue;
- log_entry.ElapsedTime=atol(elap);
- if (getword(ip,sizeof(ip),&gwarea,' ')<0){
- debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- log_entry.Ip=ip;
- if (getword(code,sizeof(code),&gwarea,' ')<0){
- debuga(_("Maybe you have a broken result code in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- log_entry.HttpCode=code;
- if (getword_atoll(&log_entry.DataSize,&gwarea,' ')<0){
- debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword(fun,sizeof(fun),&gwarea,' ')<0){
- debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword_ptr(linebuf,&log_entry.Url,&gwarea,' ')<0){
- debuga(_("Maybe you have a broken url in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword(user,sizeof(user),&gwarea,' ')<0){
- debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- log_entry.User=user;
- ilf=ILF_Squid;
- ilf_count[ilf]++;
-
- tnum=atoi(data);
- log_entry.EntryTime=localtime(&tnum);
- if (log_entry.EntryTime == NULL) {
- debuga(_("Cannot convert the timestamp from the squid log file\n"));
- exit(EXIT_FAILURE);
- }
-
- strftime(tbuf2, sizeof(tbuf2), "%H%M", log_entry.EntryTime);
+ fputs("*** SARG Log ***\n",fp_log);
+ }
- idata=(log_entry.EntryTime->tm_year+1900)*10000+(log_entry.EntryTime->tm_mon+1)*100+log_entry.EntryTime->tm_mday;
- }
- }
- if (ilf==ILF_Sarg) {
- getword_start(&gwarea,linebuf);
- if (getword(data,sizeof(data),&gwarea,'\t')<0){
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword(user,sizeof(user),&gwarea,'\t')<0) {
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- log_entry.User=user;
- if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- log_entry.Ip=ip;
- if (getword_ptr(linebuf,&log_entry.Url,&gwarea,'\t')<0){
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword_atoll(&log_entry.DataSize,&gwarea,'\t')<0){
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword(code,sizeof(code),&gwarea,'\t')<0){
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- log_entry.HttpCode=code;
- if (getword_atol(&log_entry.ElapsedTime,&gwarea,'\t')<0){
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- getword_start(&gwarea,data);
- if (getword_atoll(&iday,&gwarea,'/')<0 || iday<1 || iday>31){
- debuga(_("Maybe you have a broken date in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword_atoll(&imonth,&gwarea,'/')<0 || imonth<1 || imonth>12){
- debuga(_("Maybe you have a broken date in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword_atoll(&iyear,&gwarea,'\0')<0){
- debuga(_("Maybe you have a broken date in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- idata=builddia(iday,imonth,iyear);
- computedate(iyear,imonth,iday,&tt);
- if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
- tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
- debuga(_("Invalid time found in %s\n"),arq);
- exit(EXIT_FAILURE);
- }
- log_entry.EntryTime=&tt;
- }
- if (ilf==ILF_Isa) {
- if (linebuf[0] == '#') {
- int ncols,cols[ISACOL_Last];
-
- fixendofline(linebuf);
- getword_start(&gwarea,linebuf);
- // remove the #Fields: column at the beginning of the line
- if (getword_skip(1000,&gwarea,' ')<0){
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
- ncols=0;
- while(gwarea.current[0] != '\0') {
- if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
- if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
- if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
- if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
- if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
- if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
- if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
- if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
- ncols++;
- }
- if (cols[ISACOL_Ip]>=0) {
- isa_ncols=ncols;
- for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
- isa_cols[ncols]=cols[ncols];
- }
- continue;
- }
- if (!isa_ncols) continue;
- getword_start(&gwarea,linebuf);
- for (x=0 ; x<isa_ncols ; x++) {
- if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
- debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (x==isa_cols[ISACOL_Ip]) {
- if (strlen(str)>=sizeof(ip)) {
- debuga(_("Maybe you have a broken IP in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- strcpy(ip,str);
- log_entry.Ip=ip;
- } else if (x==isa_cols[ISACOL_UserName]) {
- if (strlen(str)>=sizeof(user)) {
- debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- strcpy(user,str);
- log_entry.User=user;
- } else if (x==isa_cols[ISACOL_Date]) {
- if (strlen(str)>=sizeof(data)) {
- debuga(_("Maybe you have a broken date in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- strcpy(data,str);
- } else if (x==isa_cols[ISACOL_Time]) {
- if (strlen(str)>=sizeof(hora)) {
- debuga(_("Maybe you have a broken time in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- strcpy(hora,str);
- } else if (x==isa_cols[ISACOL_TimeTaken]) {
- log_entry.ElapsedTime=atol(str);
- } else if (x==isa_cols[ISACOL_Bytes]) {
- log_entry.DataSize=atoll(str);
- } else if (x==isa_cols[ISACOL_Uri]) {
- log_entry.Url=str;
- } else if (x==isa_cols[ISACOL_Status]) {
- if (strlen(str)>=sizeof(code)) {
- debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- strcpy(code,str);
- }
- }
+ if (log_entry.Ip==NULL) {
+ debuga(__FILE__,__LINE__,_("Unknown input log file format: no IP addresses\n"));
+ break;
+ }
+ if (log_entry.User==NULL) {
+ debuga(__FILE__,__LINE__,_("Unknown input log file format: no user\n"));
+ break;
+ }
+ if (log_entry.Url==NULL) {
+ debuga(__FILE__,__LINE__,_("Unknown input log file format: no URL\n"));
+ break;
+ }
- if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
- static char valcode[12];
- sprintf(valcode,"DENIED/%s",code);
- log_entry.HttpCode=valcode;
- } else {
- log_entry.HttpCode=code;
- }
- getword_start(&gwarea,data);
- if (getword_atoll(&iyear,&gwarea,'-')<0){
- debuga(_("Maybe you have a broken year in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword_atoll(&imonth,&gwarea,'-')<0){
- debuga(_("Maybe you have a broken month in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
- if (getword_atoll(&iday,&gwarea,'\0')<0){
- debuga(_("Maybe you have a broken day in your %s file\n"),arq);
- exit(EXIT_FAILURE);
- }
+ idata=builddia(log_entry.EntryTime.tm_mday,log_entry.EntryTime.tm_mon+1,log_entry.EntryTime.tm_year+1900);
+ if (debugz>=LogLevel_Data)
+ printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil);
- idata=builddia(iday,imonth,iyear);
- computedate(iyear,imonth,iday,&tt);
- if (isa_cols[ISACOL_Time]>=0) {
- if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
- tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
- debuga(_("Invalid time found in %s\n"),arq);
- exit(EXIT_FAILURE);
- }
- }
- log_entry.EntryTime=&tt;
- }
-#endif
- if (log_entry.Ip==NULL) {
- debuga(_("Unknown input log file format: no IP addresses\n"));
- break;
- }
- if (log_entry.User==NULL) {
- debuga(_("Unknown input log file format: no user\n"));
- break;
- }
- if (log_entry.Url==NULL) {
- debuga(_("Unknown input log file format: no URL\n"));
- break;
+ if (EarliestDate<0 || idata<EarliestDate) {
+ EarliestDate=idata;
+ memcpy(&EarliestDateTime,&log_entry.EntryTime,sizeof(struct tm));
+ }
+ if (LatestDate<0 || idata>LatestDate) {
+ LatestDate=idata;
+ memcpy(&LatestDateTime,&log_entry.EntryTime,sizeof(struct tm));
+ }
+ if(Filter->DateRange[0] != '\0'){
+ if(idata < dfrom || idata > duntil) {
+ excluded_count[ER_OutOfDateRange]++;
+ continue;
}
+ }
- if(debugm)
- printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil);
+ // Record only hours usage which is required
+ if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL ) {
+ excluded_count[ER_OutOfWDayRange]++;
+ continue;
+ }
- if(Filter->DateRange[0] != '\0'){
- if(idata < dfrom || idata > duntil) continue;
- }
+ if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL ) {
+ excluded_count[ER_OutOfHourRange]++;
+ continue;
+ }
- // Record only hours usage which is required
- if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL )
+ PUser=process_user(&log_entry.User,log_entry.Ip,&id_is_ip);
+ switch (PUser)
+ {
+ case USERERR_NoError:
+ break;
+ case USERERR_NameTooLong:
+ if (debugz>=LogLevel_Process) debuga(__FILE__,__LINE__,_("User ID too long: %s\n"),log_entry.User);
+ excluded_count[ER_UserNameTooLong]++;
+ totregsx++;
continue;
-
- if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL )
+ case USERERR_Excluded:
+ excluded_count[ER_User]++;
continue;
-
-
- if(strlen(log_entry.User) > MAX_USER_LEN) {
- if (debugm) printf(_("User ID too long: %s\n"),log_entry.User);
- totregsx++;
+ case USERERR_InvalidChar:
+ excluded_count[ER_InvalidUserChar]++;
continue;
- }
-
- // include_users
- if(IncludeUsers[0] != '\0') {
- snprintf(val1,sizeof(val1),":%s:",log_entry.User);
- if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
- continue;
- }
-
- if(vercode(log_entry.HttpCode)) {
- if (debugm) printf(_("Excluded code: %s\n"),log_entry.HttpCode);
+ case USERERR_EmptyUser:
+ excluded_count[ER_NoUser]++;
+ continue;
+ case USERERR_SysUser:
+ excluded_count[ER_SysUser]++;
+ continue;
+ case USERERR_Ignored:
+ excluded_count[ER_IgnoredUser]++;
totregsx++;
continue;
- }
-
- if(testvaliduserchar(log_entry.User))
+ case USERERR_Untracked:
+ excluded_count[ER_UntrackedUser]++;
continue;
+ }
- // replace any tab by a single space
- for (str=log_entry.Url ; *str ; str++)
- if (*str=='\t') *str=' ';
- for (str=log_entry.HttpCode ; *str ; str++)
- if (*str=='\t') *str=' ';
-
- if (current_format!=&ReadSargLog) {
- /*
- The full URL is not saved in sarg log. There is no point in testing the URL to detect
- a downloaded file.
- */
- download_flag=is_download_suffix(log_entry.Url);
- if (download_flag) {
- safe_strcpy(download_url,log_entry.Url,sizeof(download_url));
- download_count++;
- }
- } else
- download_flag=false;
-
- url=process_url(log_entry.Url,LongUrl);
- if (!url || url[0] == '\0') continue;
-
- if(addr[0] != '\0'){
- if(strcmp(addr,log_entry.Ip)!=0) continue;
- }
- if(Filter->HostFilter) {
- if(!vhexclude(url)) {
- if (debugm) printf(_("Excluded site: %s\n"),url);
- totregsx++;
- continue;
- }
- }
+ if(vercode(log_entry.HttpCode)) {
+ if (debugz>=LogLevel_Process) debuga(__FILE__,__LINE__,_("Excluded code: %s\n"),log_entry.HttpCode);
+ excluded_count[ER_HttpCode]++;
+ totregsx++;
+ continue;
+ }
- if(Filter->StartTime >= 0 && Filter->EndTime >= 0) {
- hmr=log_entry.EntryTime.tm_hour*100+log_entry.EntryTime.tm_min;
- if(hmr < Filter->StartTime || hmr > Filter->EndTime) continue;
- }
+ // replace any tab by a single space
+ for (str=log_entry.Url ; *str ; str++)
+ if (*str=='\t') *str=' ';
+ for (str=log_entry.HttpCode ; *str ; str++)
+ if (*str=='\t') *str=' ';
- if(site[0] != '\0'){
- if(strstr(url,site)==0) continue;
+ if (log_line.current_format!=&ReadSargLog) {
+ /*
+ The full URL is not saved in sarg log. There is no point in testing the URL to detect
+ a downloaded file.
+ */
+ download_flag=is_download_suffix(log_entry.Url);
+ if (download_flag) {
+ safe_strcpy(download_url,log_entry.Url,sizeof(download_url));
}
+ } else
+ download_flag=false;
- if(UserIp) {
- log_entry.User=log_entry.Ip;
- id_is_ip=true;
- } else {
- id_is_ip=false;
- if ((log_entry.User[0]=='\0') || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' || log_entry.User[0]==' '))) {
- if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
- log_entry.User=log_entry.Ip;
- id_is_ip=true;
- }
- if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
- continue;
- if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
- log_entry.User="everybody";
- } else {
- strlow(log_entry.User);
- if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
- if ((str=strchr(user,'+'))!=NULL || (str=strchr(user,'\\'))!=NULL || (str=strchr(user,'_'))!=NULL) {
- log_entry.User=str+1;
- }
- }
- }
- }
+ url=process_url(log_entry.Url,LongUrl);
+ if (!url || url[0] == '\0') {
+ excluded_count[ER_NoUrl]++;
+ continue;
+ }
- if(us[0] != '\0'){
- if(strcmp(log_entry.User,us)!=0) continue;
+ if(addr[0] != '\0'){
+ if(strcmp(addr,log_entry.Ip)!=0) {
+ excluded_count[ER_UntrackedIpAddr]++;
+ continue;
}
-
- if(Filter->SysUsers) {
- snprintf(wuser,sizeof(wuser),":%s:",log_entry.User);
- if(strstr(userfile, wuser) == 0)
- continue;
+ }
+ if(Filter->HostFilter) {
+ if(!vhexclude(url)) {
+ if (debugz>=LogLevel_Data) debuga(__FILE__,__LINE__,_("Excluded site: %s\n"),url);
+ excluded_count[ER_Url]++;
+ totregsx++;
+ continue;
}
+ }
- if(Filter->UserFilter) {
- if(!vuexclude(log_entry.User)) {
- if (debugm) printf(_("Excluded user: %s\n"),log_entry.User);
- totregsx++;
- continue;
- }
+ if(Filter->StartTime >= 0 && Filter->EndTime >= 0) {
+ hmr=log_entry.EntryTime.tm_hour*100+log_entry.EntryTime.tm_min;
+ if(hmr < Filter->StartTime || hmr > Filter->EndTime) {
+ excluded_count[ER_OutOfTimeRange]++;
+ continue;
}
+ }
- if (log_entry.User[0]=='\0' || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' ||
- log_entry.User[0]==' ' || log_entry.User[0]==':')))
+ if(site[0] != '\0'){
+ if(strstr(url,site)==0) {
+ excluded_count[ER_UntrackedUrl]++;
continue;
+ }
+ }
- if (log_entry.DataSize<0) log_entry.DataSize=0;
+ if (log_entry.DataSize<0) log_entry.DataSize=0;
- if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0;
- if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) {
- log_entry.ElapsedTime=0;
- }
+ if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0;
+ if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) {
+ log_entry.ElapsedTime=0;
+ }
- if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
- fixendofline(str);
- snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1);
- } else strcpy(smartfilter,"\"\"");
+ if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
+ fixendofline(str);
+ snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1);
+ } else strcpy(smartfilter,"\"\"");
- nopen=0;
- prev_ufile=NULL;
- for (ufile=first_user_file ; ufile && strcmp(log_entry.User,ufile->user->id)!=0 ; ufile=ufile->next) {
- prev_ufile=ufile;
- if (ufile->file) nopen++;
- }
+ nopen=0;
+ prev_ufile=NULL;
+ for (ufile=first_user_file ; ufile && strcmp(log_entry.User,ufile->user->id)!=0 ; ufile=ufile->next) {
+ prev_ufile=ufile;
+ if (ufile->file) nopen++;
+ }
+ if (!ufile) {
+ ufile=malloc(sizeof(*ufile));
if (!ufile) {
- ufile=malloc(sizeof(*ufile));
- if (!ufile) {
- debuga(_("Not enough memory to store the user %s\n"),log_entry.User);
- exit(EXIT_FAILURE);
- }
- memset(ufile,0,sizeof(*ufile));
+ debuga(__FILE__,__LINE__,_("Not enough memory to store the user %s\n"),log_entry.User);
+ exit(EXIT_FAILURE);
+ }
+ memset(ufile,0,sizeof(*ufile));
+ ufile->next=first_user_file;
+ first_user_file=ufile;
+ /*
+ * This id_is_ip stuff is just to store the string only once if the user is
+ * identified by its IP address instead of a distinct ID and IP address.
+ */
+ uinfo=userinfo_create(log_entry.User,(id_is_ip) ? NULL : log_entry.Ip);
+ ufile->user=uinfo;
+ nusers++;
+ } else {
+ if (prev_ufile) {
+ prev_ufile->next=ufile->next;
ufile->next=first_user_file;
first_user_file=ufile;
- uinfo=userinfo_create(log_entry.User);
- ufile->user=uinfo;
- uinfo->id_is_ip=id_is_ip;
- } else {
- if (prev_ufile) {
- prev_ufile->next=ufile->next;
- ufile->next=first_user_file;
- first_user_file=ufile;
- }
}
+ }
#ifdef ENABLE_DOUBLE_CHECK_DATA
+ if (strcmp(log_entry.HttpCode,"TCP_DENIED/407")!=0) {
ufile->user->nbytes+=log_entry.DataSize;
ufile->user->elap+=log_entry.ElapsedTime;
+ }
#endif
- if (ufile->file==NULL) {
- if (nopen>=maxopenfiles) {
- x=0;
- for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
- if (ufile1->file!=NULL) {
- if (x>=maxopenfiles) {
- if (fclose(ufile1->file)==EOF) {
- debuga(_("Failed to close the log file of user %s - %s\n"),ufile1->user->id,strerror(errno));
- exit(EXIT_FAILURE);
- }
- ufile1->file=NULL;
+ if (ufile->file==NULL) {
+ if (nopen>=maxopenfiles) {
+ x=0;
+ for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
+ if (ufile1->file!=NULL) {
+ if (x>=maxopenfiles) {
+ if (fclose(ufile1->file)==EOF) {
+ debuga(__FILE__,__LINE__,_("Write error in log file of user %s: %s\n"),ufile1->user->id,strerror(errno));
+ exit(EXIT_FAILURE);
}
- x++;
+ ufile1->file=NULL;
}
+ x++;
}
}
- if (snprintf (tmp3, sizeof(tmp3), "%s/%s.user_unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
- debuga(_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp, ufile->user->filename);
- exit(EXIT_FAILURE);
- }
- if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
- debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno));
- exit (1);
- }
}
-
- strftime(dia, sizeof(dia), "%d/%m/%Y",&log_entry.EntryTime);
- strftime(hora,sizeof(hora),"%H:%M:%S",&log_entry.EntryTime);
-
- if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
- log_entry.Ip,url,(uint64_t)log_entry.DataSize,
- log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
- debuga(_("Write error in the log file of user %s\n"),log_entry.User);
+ if (snprintf (tmp3, sizeof(tmp3), "%s/%s.user_unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
+ debuga(__FILE__,__LINE__,_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp, ufile->user->filename);
exit(EXIT_FAILURE);
}
-
- if (fp_log && current_format!=&ReadSargLog) {
- fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
- log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,
- log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
+ if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
+ debuga(__FILE__,__LINE__,_("(log) Cannot open temporary file %s: %s\n"), tmp3, strerror(errno));
+ exit(EXIT_FAILURE);
}
+ }
- totregsg++;
+ strftime(dia, sizeof(dia), "%d/%m/%Y",&log_entry.EntryTime);
+ strftime(hora,sizeof(hora),"%H:%M:%S",&log_entry.EntryTime);
- if(!dataonly && download_flag && strstr(log_entry.HttpCode,"DENIED") == 0) {
- ndownload = 1;
+ if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
+ log_entry.Ip,url,(uint64_t)log_entry.DataSize,
+ log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
+ debuga(__FILE__,__LINE__,_("Write error in the log file of user %s\n"),log_entry.User);
+ exit(EXIT_FAILURE);
+ }
+ records_kept++;
- if ( ! fp_Download_Unsort ) {
- if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
- debuga(_("(log) Cannot open temporary file: %s - %s\n"),sz_Download_Unsort, strerror(errno));
- exit (1);
- }
- }
- fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,
- log_entry.User,log_entry.Ip,download_url);
- }
+ if (fp_log && log_line.current_format!=&ReadSargLog) {
+ fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
+ log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,
+ log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
+ }
- denied_write(&log_entry);
- authfail_write(&log_entry);
+ totregsg++;
- if (current_format!=&ReadSargLog) {
- if(!totper || idata<mindate){
- mindate=idata;
- memcpy(&period.start,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
- strcpy(start_hour,tbuf2);
- }
- if (!totper || idata>maxdate) {
- maxdate=idata;
- memcpy(&period.end,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
- }
- totper=true;
- }
+ denied_write(&log_entry);
+ authfail_write(&log_entry);
+ if (download_flag) download_write(&log_entry,download_url);
+ if (log_entry.UserAgent)
+ {
+ if (!UseragentLog)
+ UseragentLog=UserAgent_Open();
+ UserAgent_Write(UseragentLog,&log_entry.EntryTime,log_entry.Ip,log_entry.User,log_entry.UserAgent);
+ }
- if(debugm){
- printf("IP=\t%s\n",log_entry.Ip);
- printf("USER=\t%s\n",log_entry.User);
- printf("ELAP=\t%ld\n",log_entry.ElapsedTime);
- printf("DATE=\t%s\n",dia);
- printf("TIME=\t%s\n",hora);
- //printf("FUNC=\t%s\n",fun);
- printf("URL=\t%s\n",url);
- printf("CODE=\t%s\n",log_entry.HttpCode);
- printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize);
+ if (log_line.current_format!=&ReadSargLog) {
+ if (period.start.tm_year==0 || idata<mindate || compare_date(&period.start,&log_entry.EntryTime)>0){
+ mindate=idata;
+ memcpy(&period.start,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
+ }
+ if (period.end.tm_year==0 || idata>maxdate || compare_date(&period.end,&log_entry.EntryTime)<0) {
+ maxdate=idata;
+ memcpy(&period.end,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
}
}
- if (!from_stdin) {
- if (from_pipe)
- pclose(fp_in);
- else {
- fclose(fp_in);
- if (ShowReadStatistics) {
- if (ShowReadPercent)
- printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2, (float) 100 );
- else
- printf(_("SARG: Records in file: %lu\n"),recs2);
- }
- }
+ if (debugz>=LogLevel_Data){
+ printf("IP=\t%s\n",log_entry.Ip);
+ printf("USER=\t%s\n",log_entry.User);
+ printf("ELAP=\t%ld\n",log_entry.ElapsedTime);
+ printf("DATE=\t%s\n",dia);
+ printf("TIME=\t%s\n",hora);
+ //printf("FUNC=\t%s\n",fun);
+ printf("URL=\t%s\n",url);
+ printf("CODE=\t%s\n",log_entry.HttpCode);
+ printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize);
}
}
longline_destroy(&line);
+ if (FileObject_Close(fp_in)) {
+ debuga(__FILE__,__LINE__,_("Read error in \"%s\": %s\n"),arq,FileObject_GetLastCloseError());
+ exit(EXIT_FAILURE);
+ }
+ if (UseragentLog) fclose(UseragentLog);
+ if (ShowReadStatistics) {
+ if (ShowReadPercent)
+ printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2, (float) 100 );
+ else
+ printf(_("SARG: Records in file: %lu\n"),recs2);
+ }
+}
+
+/*!
+ * Display a line with the excluded entries count.
+ *
+ * \param Explain A translated string explaining the exluded count.
+ * \param Reason The reason number.
+ */
+static void DisplayExcludeCount(const char *Explain,enum ExcludeReasonEnum Reason)
+{
+ if (excluded_count[Reason]>0) {
+ debuga(__FILE__,__LINE__," %s: %lu\n",Explain,excluded_count[Reason]);
+ }
+}
+
+/*!
+Read the log files.
+
+\param Filter The filtering parameters for the file to load.
+
+\retval 1 Records found.
+\retval 0 No record found.
+*/
+int ReadLogFile(struct ReadLogDataStruct *Filter)
+{
+ int x;
+ int cstatus;
+ struct userfilestruct *ufile;
+ struct userfilestruct *ufile1;
+ FileListIterator FIter;
+ const char *file;
+
+ for (x=0 ; x<sizeof(format_count)/sizeof(*format_count) ; x++) format_count[x]=0;
+ for (x=0 ; x<sizeof(excluded_count)/sizeof(*excluded_count) ; x++) excluded_count[x]=0;
+ first_user_file=NULL;
+
+ if (!dataonly) {
+ denied_open();
+ authfail_open();
+ download_open();
+ }
+
+ FIter=FileListIter_Open(AccessLog);
+ while ((file=FileListIter_Next(FIter))!=NULL)
+ ReadOneLogFile(Filter,file);
+ FileListIter_Close(FIter);
+
if(fp_log != NULL) {
- char end_hour[128];
char val2[40];
- char val4[255];//val4 must not be bigger than arq_log without fixing the strcpy below
-
- fclose(fp_log);
- safe_strcpy(end_hour,tbuf2,sizeof(end_hour));
- strftime(val2,sizeof(val2),"%d%m%Y",&period.start);
- strftime(val1,sizeof(val1),"%d%m%Y",&period.end);
- if (snprintf(val4,sizeof(val4),"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour)>=sizeof(val4)) {
- debuga(_("File name too long: %s/sarg-%s_%s-%s_%s.log\n"),ParsedOutputLog,val2,start_hour,val1,end_hour);
+ char val4[4096];//val4 must not be bigger than SargLogFile without fixing the strcpy below
+
+ if (fclose(fp_log)==EOF) {
+ debuga(__FILE__,__LINE__,_("Write error in \"%s\": %s\n"),SargLogFile,strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ strftime(val2,sizeof(val2),"%d%m%Y_%H%M",&period.start);
+ strftime(val1,sizeof(val1),"%d%m%Y_%H%M",&period.end);
+ if (snprintf(val4,sizeof(val4),"%s/sarg-%s-%s.log",ParsedOutputLog,val2,val1)>=sizeof(val4)) {
+ debuga(__FILE__,__LINE__,_("Path too long: "));
+ debuga_more("%s/sarg-%s-%s.log\n",ParsedOutputLog,val2,val1);
exit(EXIT_FAILURE);
}
- if (rename(arq_log,val4)) {
- debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno));
+ if (rename(SargLogFile,val4)) {
+ debuga(__FILE__,__LINE__,_("failed to rename %s to %s - %s\n"),SargLogFile,val4,strerror(errno));
} else {
- strcpy(arq_log,val4);
+ strcpy(SargLogFile,val4);
if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
/*
No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
necessary around the command name, put them in the configuration file.
*/
- if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,arq_log)>=sizeof(val1)) {
- debuga(_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,arq_log);
+ if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,SargLogFile)>=sizeof(val1)) {
+ debuga(__FILE__,__LINE__,_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,SargLogFile);
exit(EXIT_FAILURE);
}
cstatus=system(val1);
if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
- debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
- debuga(_("command: %s\n"),val1);
+ debuga(__FILE__,__LINE__,_("command return status %d\n"),WEXITSTATUS(cstatus));
+ debuga(__FILE__,__LINE__,_("command: %s\n"),val1);
exit(EXIT_FAILURE);
}
}
}
if(debug)
- debuga(_("Sarg parsed log saved as %s\n"),arq_log);
+ debuga(__FILE__,__LINE__,_("Sarg parsed log saved as %s\n"),SargLogFile);
}
denied_close();
authfail_close();
- if (fp_Download_Unsort) fclose (fp_Download_Unsort);
+ download_close();
for (ufile=first_user_file ; ufile ; ufile=ufile1) {
ufile1=ufile->next;
- if (ufile->file!=NULL) fclose(ufile->file);
+ if (ufile->file!=NULL && fclose(ufile->file)==EOF) {
+ debuga(__FILE__,__LINE__,_("Write error in log file of user %s: %s\n"),ufile->user->id,strerror(errno));
+ exit(EXIT_FAILURE);
+ }
free(ufile);
}
if (debug) {
- int totalcount=0;
-
- debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
+ unsigned long int totalcount=0;
+
+ debuga(__FILE__,__LINE__,_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
+
+ for (x=sizeof(excluded_count)/sizeof(*excluded_count)-1 ; x>=0 && excluded_count[x]>0 ; x--);
+ if (x>=0) {
+ debuga(__FILE__,__LINE__,_("Reasons for excluded entries:\n"));
+ DisplayExcludeCount(_("User name too long"),ER_UserNameTooLong);
+ DisplayExcludeCount(_("Squid logged an incomplete query received from the client"),ER_IncompleteQuery);
+ DisplayExcludeCount(_("Log file turned over"),ER_LogfileTurnedOver);
+ DisplayExcludeCount(_("Excluded by \"exclude_string\" in sarg.conf"),ER_ExcludeString);
+ DisplayExcludeCount(_("Unknown input log file format"),ER_UnknownFormat);
+ DisplayExcludeCount(_("Line ignored by the input log format"),ER_FormatData);
+ DisplayExcludeCount(_("Time outside the requested date range (-d)"),ER_OutOfDateRange);
+ DisplayExcludeCount(_("Ignored week day (\"weekdays\" parameter in sarg.conf)"),ER_OutOfWDayRange);
+ DisplayExcludeCount(_("Ignored hour (\"hours\" parameter in sarg.conf)"),ER_OutOfHourRange);
+ DisplayExcludeCount(_("User is not in the \"include_users\" list"),ER_User);
+ DisplayExcludeCount(_("HTTP code excluded by \"exclude_code\" file"),ER_HttpCode);
+ DisplayExcludeCount(_("Invalid character found in user name"),ER_InvalidUserChar);
+ DisplayExcludeCount(_("No URL in entry"),ER_NoUrl);
+ DisplayExcludeCount(_("Not the IP address requested with -a"),ER_UntrackedIpAddr);
+ DisplayExcludeCount(_("URL excluded by -c or \"exclude_hosts\""),ER_Url);
+ DisplayExcludeCount(_("Entry time outside of requested hour range (-t)"),ER_OutOfTimeRange);
+ DisplayExcludeCount(_("Not the URL requested by -s"),ER_UntrackedUrl);
+ DisplayExcludeCount(_("No user in entry"),ER_NoUser);
+ DisplayExcludeCount(_("Not the user requested by -u"),ER_UntrackedUser);
+ DisplayExcludeCount(_("System user as defined by \"password\" in sarg.conf"),ER_SysUser);
+ DisplayExcludeCount(_("User ignored by \"exclude_users\""),ER_IgnoredUser);
+ }
for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) {
if (format_count[x]>0) {
/* TRANSLATORS: It displays the number of lines found in the input log files
* for each supported log format. The log format name is the %s and is a string
* you translate somewhere else. */
- debuga(_("%s: %d entries\n"),_(LogFormats[x]->Name),format_count[x]);
+ debuga(__FILE__,__LINE__,_("%s: %lu entries\n"),_(LogFormats[x]->Name),format_count[x]);
totalcount+=format_count[x];
}
}
if (totalcount==0 && totregsg)
- debuga(_("Log with invalid format\n"));
- }
-
- if(debugz) {
- debugaz(_("date=%s\n"),dia);
- debugaz(_("period=%s\n"),period.text);
+ debuga(__FILE__,__LINE__,_("Log with invalid format\n"));
}
return((totregsg!=0) ? 1 : 0);
}
+
+/*!
+ * Get the start and end date of the period covered by the log files.
+ */
+bool GetLogPeriod(struct tm *Start,struct tm *End)
+{
+ bool Valid=false;
+
+ if (EarliestDate>=0) {
+ memcpy(Start,&EarliestDateTime,sizeof(struct tm));
+ Valid=true;
+ } else {
+ memset(Start,0,sizeof(struct tm));
+ }
+ if (LatestDate>=0) {
+ memcpy(End,&LatestDateTime,sizeof(struct tm));
+ Valid=true;
+ } else {
+ memset(End,0,sizeof(struct tm));
+ }
+ return(Valid);
+}