/*
* SARG Squid Analysis Report Generator http://sarg.sourceforge.net
- * 1998, 2012
+ * 1998, 2015
*
* SARG donations:
* please look at http://sarg.sourceforge.net/donations.php
#include "include/conf.h"
#include "include/defs.h"
+#include "include/readlog.h"
+
+/*!
+Maximum number of columns accepted in an extended log format.
+
+The current value is an arbitrary number chosen to have an
+actual limit.
+*/
+#define MAX_EXT_COLUMNS 250
+
+enum ext_col_id {
+ EXTCOL_Ip,
+ EXTCOL_UserName,
+ EXTCOL_Date,
+ EXTCOL_Time,
+ EXTCOL_TimeTaken,
+ EXTCOL_Bytes,
+ EXTCOL_Uri,
+ EXTCOL_Status,
+ EXTCOL_Last //last entry of the list !
+};
+
+//! \c True if the extended common long format is confirmed.
+static bool InExtLog=false;
+//! The index of relevant columns in the log file.
+static int ExtCols[EXTCOL_Last];
+//! The character to use as a columns separator.
+static char ExtColSep[MAX_EXT_COLUMNS];
+//! The number of columns according to the "fields" directive.
+static int ExtColNumber;
/*!
A new file is being read. The name of the file is \a FileName.
*/
static void ExtLog_NewFile(const char *FileName)
{
+ InExtLog=false;
+ ExtColNumber=0;
+}
+
+/*!
+Parse the "Fields" directive listing the columns in the log. The
+\a columns is a pointer to the first column of the directive.
+
+\return \c True if the fields is valid or false if it could not
+be decoded.
+*/
+static bool ExtLog_Fields(const char *columns)
+{
+ int col;
+ int len;
+ int prefix;
+ int header_start;
+ int header_end;
+ int i;
+ enum ext_col_id col_id;
+ char col_sep;
+ // see http://www.w3.org/TR/WD-logfile.html for the list of prefixes
+ const char const *prefixes[]=
+ {
+ "c",
+ "s",
+ "r",
+ "cs",
+ "sc",
+ "sr",
+ "rs",
+ "x",
+ };
+
+ for (i=0 ; i<EXTCOL_Last ; i++) ExtCols[i]=-1;
+
+ col=0;
+ while (*columns) {
+ if (col>=MAX_EXT_COLUMNS) {
+ debuga(__FILE__,__LINE__,_("Too many columns found in an extended log format. The maximum allowed is %d but it can be changed if a bigger value is legitimate\n"),MAX_EXT_COLUMNS);
+ exit(EXIT_FAILURE);
+ }
+ prefix=-1;
+ header_start=-1;
+ header_end=-1;
+ for (i=sizeof(prefixes)/sizeof(*prefixes)-1 ; i>=0 ; i--) {
+ len=strlen(prefixes[i]);
+ if (strncasecmp(columns,prefixes[i],len)==0) {
+ if (columns[len]=='-') {
+ prefix=len++;
+ break;
+ } else if (columns[len]=='(') {
+ header_start=len++;
+ break;
+ }
+ }
+ }
+ (void)prefix;//compiler pacifier
+ if (i<0) len=0;
+ for ( ; (unsigned char)columns[len]>' ' ; len++) {//skip a word and accept any separator (tab or space)
+ if (header_start>=0 && columns[len]==')') header_end=len;
+ }
+ (void)header_end;//compiler pacifier
+ col_sep=columns[len];
+ ExtColSep[col]=col_sep;
+
+ // see http://www.w3.org/TR/WD-logfile.html for list of possible identifiers
+ col_id=EXTCOL_Last;
+ if (len==4) {
+ if (strncasecmp(columns,"c-ip",len)==0 && ExtCols[EXTCOL_Ip]<0) col_id=EXTCOL_Ip;
+ else if (strncasecmp(columns,"date",len)==0) col_id=EXTCOL_Date;
+ else if (strncasecmp(columns,"time",len)==0) col_id=EXTCOL_Time;
+ } else if (len==5) {
+ if (strncasecmp(columns,"c-dns",len)==0) col_id=EXTCOL_Ip;
+ } else if (len==6) {
+ if (strncasecmp(columns,"cs-uri",len)==0) col_id=EXTCOL_Uri;
+ } else if (len==8) {
+ if (strncasecmp(columns,"sc-bytes",len)==0) col_id=EXTCOL_Bytes;
+ } else if (len==9) {
+ if (strncasecmp(columns,"sc-status",len)==0) col_id=EXTCOL_Status;
+ } else if (len==10) {
+ if (strncasecmp(columns,"time-taken",len)==0) col_id=EXTCOL_TimeTaken;
+ } else if (len==11) {
+ if (strncasecmp(columns,"cs-username",len)==0) col_id=EXTCOL_UserName;
+ }
+ if (col_id!=EXTCOL_Last) {
+ ExtCols[col_id]=col;
+ }
+
+ col++;
+ columns+=len;
+ while (*columns && (unsigned char)*columns<=' ') {
+ if (*columns!=col_sep) {
+ debuga(__FILE__,__LINE__,_("Multiple column separators found between two columns in the \"fields\" directive of an extended log format\n"));
+ exit(EXIT_FAILURE);
+ }
+ columns++;
+ }
+ }
+ ExtColNumber=col;
+ return(true);
+}
+
+/*!
+Decode a directive field from the \a Line.
+
+\return RLRC_Ignore if the line is a directive or RLRC_Unknown
+if the line is not a known directive.
+*/
+static enum ReadLogReturnCodeEnum ExtLog_Directive(const char *Line)
+{
+ ++Line;
+ if (strncasecmp(Line,"Version:",8)==0) return(RLRC_Ignore);
+ if (strncasecmp(Line,"Software:",9)==0) return(RLRC_Ignore);
+ if (strncasecmp(Line,"Start-Date:",11)==0) return(RLRC_Ignore);
+ if (strncasecmp(Line,"End-Date:",9)==0) return(RLRC_Ignore);
+ if (strncasecmp(Line,"Date:",5)==0) return(RLRC_Ignore);
+ if (strncasecmp(Line,"Remark:",7)==0) return(RLRC_Ignore);
+ if (strncasecmp(Line,"Fields:",7)==0) {
+ Line+=7;
+ while (*Line==' ' || *Line=='\t') Line++;
+ if (!ExtLog_Fields(Line)) return(RLRC_Unknown);
+ return(RLRC_Ignore);
+ }
+ return(RLRC_Unknown);
+}
+
+/*!
+Get the type of the column \a col_num.
+
+\return The type of the column or EXTCOL_Last if
+the column must be ignored.
+*/
+static enum ext_col_id ExtLog_WhichColumn(int col_num)
+{
+ int i;
+
+ for (i=0 ; i<EXTCOL_Last && ExtCols[i]!=col_num ; i++);
+ return(i);
+}
+
+/*!
+Scan through the string of a column.
+
+\param Line The pointer to the beginning of the string.
+\param col The column number.
+*/
+static char *ExtLog_GetString(char *Line,int col,char **End)
+{
+ bool quote;
+ bool dequote;
+
+ //skip opening double quote
+ quote=(*Line=='\"');
+ if (quote) ++Line;
+
+ dequote=false;
+ while (*Line) {
+ if (quote) {
+ if (*Line=='\"') {
+ if (Line[1]!='\"') {
+ if (End) *End=(dequote) ? NULL : Line;
+ Line++;//skip the closing quote
+ quote=false;
+ break;
+ }
+ dequote=true;
+ }
+ } else {
+ if (*Line==ExtColSep[col]) {
+ if (End) *End=Line;
+ break;
+ }
+ }
+ Line++;
+ }
+ if (quote) return(NULL);//missing closing quote.
+ return(Line);
+}
+
+/*!
+Scan through the date in a column.
+
+\param Line The pointer to the beginning of the string.
+*/
+static char *ExtLog_GetDate(char *Line,struct tm *Date)
+{
+ bool quote;
+ int year;
+ int month;
+ int day;
+ int next;
+
+ //skip opening double quote
+ quote=(*Line=='\"');
+ if (quote) ++Line;
+ if (sscanf(Line,"%d-%d-%d%n",&year,&month,&day,&next)!=3) return(NULL);
+ Line+=next;
+ if (quote) {
+ if (*Line!='\"') return(NULL);//missing closing quote.
+ ++Line;
+ }
+ Date->tm_year=year-1900;
+ Date->tm_mon=month-1;
+ Date->tm_mday=day;
+ return(Line);
+}
+
+/*!
+Scan through the time in a column.
+
+\param Line The pointer to the beginning of the string.
+*/
+static char *ExtLog_GetTime(char *Line,struct tm *Date)
+{
+ bool quote;
+ int hour;
+ int minute;
+ int second;
+ int next;
+
+ //skip opening double quote
+ quote=(*Line=='\"');
+ if (quote) ++Line;
+ if (sscanf(Line,"%d:%d:%d%n",&hour,&minute,&second,&next)!=3) return(NULL);
+ Line+=next;
+ if (quote) {
+ if (*Line!='\"') return(NULL);//missing closing quote.
+ ++Line;
+ }
+ Date->tm_hour=hour;
+ Date->tm_min=minute;
+ Date->tm_sec=second;
+ return(Line);
+}
+
+/*!
+Scan through a number in a column.
+
+\param Line The pointer to the beginning of the string.
+\param Value A variable to store the number.
+*/
+static char *ExtLog_GetLongInt(char *Line,long int *Value)
+{
+ bool quote;
+
+ //skip opening double quote
+ quote=(*Line=='\"');
+ if (quote) ++Line;
+ *Value=0;
+ while (isdigit(*Line)) *Value=*Value*10+(*Line++-'0');
+ if (quote) {
+ if (*Line!='\"') return(NULL);//missing closing quote.
+ ++Line;
+ }
+ return(Line);
+}
+
+/*!
+Scan through a number in a column.
+
+\param Line The pointer to the beginning of the string.
+\param Value A variable to store the number.
+*/
+static char *ExtLog_GetLongLongInt(char *Line,long long int *Value)
+{
+ bool quote;
+
+ //skip opening double quote
+ quote=(*Line=='\"');
+ if (quote) ++Line;
+ *Value=0;
+ while (isdigit(*Line)) *Value=*Value*10+(*Line++-'0');
+ if (quote) {
+ if (*Line!='\"') return(NULL);//missing closing quote.
+ ++Line;
+ }
+ return(Line);
+}
+
+/*!
+Remove the quotes inside the \a string. If no quotes are known to
+be in the string, the \a end_ptr is the pointer to the last
+character of the string.
+*/
+static void ExtLog_FixString(char *string,char *end_ptr)
+{
+ char *dest;
+
+ if (!string) return;//string not parsed
+ if (end_ptr) { //end is known and no quotes are in the string
+ *end_ptr='\0';
+ return;
+ }
+ // remove the quotes and end at the first unremoveable quote
+ dest=string;
+ while (*string)
+ {
+ if (*string=='\"') {
+ if (string[1]!='\"') break; //closing quote
+ string++;//skip the first quote
+ }
+ *dest++=*string++;
+ }
+ *dest='\0';
}
/*!
*/
static enum ReadLogReturnCodeEnum ExtLog_ReadEntry(char *Line,struct ReadLogStruct *Entry)
{
+ int col;
+ enum ext_col_id col_id;
+ char *Ip=NULL;
+ char *IpEnd;
+ char *User=NULL;
+ char *UserEnd;
+ char *UrlEnd;
+ char *HttpCodeEnd;
+
+ // is it a directive
+ if (*Line=='#') {
+ enum ReadLogReturnCodeEnum status=ExtLog_Directive(Line);
+ if (status!=RLRC_Unknown) InExtLog=true;
+ return(status);
+ }
+ if (!InExtLog) return(RLRC_Unknown);
+
+ col=0;
+ while (*Line) {
+ if (col>=ExtColNumber) {
+ debuga(__FILE__,__LINE__,_("Too many columns in an extended log file format: %d columns found when %d have been announced\n"),col,ExtColNumber);
+ return(RLRC_Unknown);
+ }
+ col_id=ExtLog_WhichColumn(col);
+ switch (col_id)
+ {
+ case EXTCOL_Ip:
+ Entry->Ip=Ip=Line;
+ Line=ExtLog_GetString(Line,col,&IpEnd);
+ if (!Line) return(RLRC_Unknown);
+ break;
+ case EXTCOL_UserName:
+ Entry->User=User=Line;
+ Line=ExtLog_GetString(Line,col,&UserEnd);
+ if (!Line) return(RLRC_Unknown);
+ break;
+ case EXTCOL_Date:
+ Line=ExtLog_GetDate(Line,&Entry->EntryTime);
+ if (!Line) return(RLRC_Unknown);
+ break;
+ case EXTCOL_Time:
+ Line=ExtLog_GetTime(Line,&Entry->EntryTime);
+ if (!Line) return(RLRC_Unknown);
+ break;
+ case EXTCOL_TimeTaken:
+ Line=ExtLog_GetLongInt(Line,&Entry->ElapsedTime);
+ if (!Line) return(RLRC_Unknown);
+ break;
+ case EXTCOL_Bytes:
+ Line=ExtLog_GetLongLongInt(Line,&Entry->DataSize);
+ if (!Line) return(RLRC_Unknown);
+ break;
+ case EXTCOL_Uri:
+ Entry->Url=Line;
+ Line=ExtLog_GetString(Line,col,&UrlEnd);
+ if (!Line) return(RLRC_Unknown);
+ break;
+ case EXTCOL_Status:
+ Entry->HttpCode=Line;
+ Line=ExtLog_GetString(Line,col,&HttpCodeEnd);
+ if (!Line) return(RLRC_Unknown);
+ break;
+ case EXTCOL_Last://ignored column
+ Line=ExtLog_GetString(Line,col,NULL);
+ if (!Line) return(RLRC_Unknown);
+ break;
+ }
+ if (*Line && *Line!=ExtColSep[col]) return(RLRC_Unknown);
+ while (*Line && *Line==ExtColSep[col]) Line++;
+ col++;
+ }
+ if (col!=ExtColNumber) {
+ debuga(__FILE__,__LINE__,_("Only %d columns in an extended log file format when %d have been announced\n"),col,ExtColNumber);
+ return(RLRC_Unknown);
+ }
+
+ // check the entry time
+ if (mktime(&Entry->EntryTime)==-1) {
+ debuga(__FILE__,__LINE__,_("Invalid date or time found in the extended log file\n"));
+ return(RLRC_InternalError);
+ }
+
+ ExtLog_FixString(Ip,IpEnd);
+ ExtLog_FixString(User,UserEnd);
+ ExtLog_FixString(Entry->Url,UrlEnd);
+ ExtLog_FixString(Entry->HttpCode,HttpCodeEnd);
+
return(RLRC_NoError);
}