2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
32 Maximum number of columns accepted in an extended log format.
34 The current value is an arbitrary number chosen to have an
37 #define MAX_EXT_COLUMNS 250
48 EXTCOL_Last
//last entry of the list !
51 //! \c True if the extended common long format is confirmed.
52 static bool InExtLog
=false;
53 //! The index of relevant columns in the log file.
54 static int ExtCols
[EXTCOL_Last
];
55 //! The character to use as a columns separator.
56 static char ExtColSep
[MAX_EXT_COLUMNS
];
57 //! The number of columns according to the "fields" directive.
58 static int ExtColNumber
;
61 A new file is being read. The name of the file is \a FileName.
63 static void ExtLog_NewFile(const char *FileName
)
70 Parse the "Fields" directive listing the columns in the log. The
71 \a columns is a pointer to the first column of the directive.
73 \return \c True if the fields is valid or false if it could not
76 static bool ExtLog_Fields(const char *columns
)
84 enum ext_col_id col_id
;
86 // see http://www.w3.org/TR/WD-logfile.html for the list of prefixes
87 const char const *prefixes
[]=
99 for (i
=0 ; i
<EXTCOL_Last
; i
++) ExtCols
[i
]=-1;
103 if (col
>=MAX_EXT_COLUMNS
) {
104 debuga(__FILE__
,__LINE__
,_("Too many columns found in an extended log format. The maximum allowed is %d but it can be changed if a bigger value is legitimate\n"),MAX_EXT_COLUMNS
);
110 for (i
=sizeof(prefixes
)/sizeof(*prefixes
)-1 ; i
>=0 ; i
--) {
111 len
=strlen(prefixes
[i
]);
112 if (strncasecmp(columns
,prefixes
[i
],len
)==0) {
113 if (columns
[len
]=='-') {
116 } else if (columns
[len
]=='(') {
122 (void)prefix
;//compiler pacifier
124 for ( ; (unsigned char)columns
[len
]>' ' ; len
++) {//skip a word and accept any separator (tab or space)
125 if (header_start
>=0 && columns
[len
]==')') header_end
=len
;
127 (void)header_end
;//compiler pacifier
128 col_sep
=columns
[len
];
129 ExtColSep
[col
]=col_sep
;
131 // see http://www.w3.org/TR/WD-logfile.html for list of possible identifiers
134 if (strncasecmp(columns
,"c-ip",len
)==0 && ExtCols
[EXTCOL_Ip
]<0) col_id
=EXTCOL_Ip
;
135 else if (strncasecmp(columns
,"date",len
)==0) col_id
=EXTCOL_Date
;
136 else if (strncasecmp(columns
,"time",len
)==0) col_id
=EXTCOL_Time
;
138 if (strncasecmp(columns
,"c-dns",len
)==0) col_id
=EXTCOL_Ip
;
140 if (strncasecmp(columns
,"cs-uri",len
)==0) col_id
=EXTCOL_Uri
;
142 if (strncasecmp(columns
,"sc-bytes",len
)==0) col_id
=EXTCOL_Bytes
;
144 if (strncasecmp(columns
,"sc-status",len
)==0) col_id
=EXTCOL_Status
;
145 } else if (len
==10) {
146 if (strncasecmp(columns
,"time-taken",len
)==0) col_id
=EXTCOL_TimeTaken
;
147 } else if (len
==11) {
148 if (strncasecmp(columns
,"cs-username",len
)==0) col_id
=EXTCOL_UserName
;
150 if (col_id
!=EXTCOL_Last
) {
156 while (*columns
&& (unsigned char)*columns
<=' ') {
157 if (*columns
!=col_sep
) {
158 debuga(__FILE__
,__LINE__
,_("Multiple column separators found between two columns in the \"fields\" directive of an extended log format\n"));
169 Decode a directive field from the \a Line.
171 \return RLRC_Ignore if the line is a directive or RLRC_Unknown
172 if the line is not a known directive.
174 static enum ReadLogReturnCodeEnum
ExtLog_Directive(const char *Line
)
177 if (strncasecmp(Line
,"Version:",8)==0) return(RLRC_Ignore
);
178 if (strncasecmp(Line
,"Software:",9)==0) return(RLRC_Ignore
);
179 if (strncasecmp(Line
,"Start-Date:",11)==0) return(RLRC_Ignore
);
180 if (strncasecmp(Line
,"End-Date:",9)==0) return(RLRC_Ignore
);
181 if (strncasecmp(Line
,"Date:",5)==0) return(RLRC_Ignore
);
182 if (strncasecmp(Line
,"Remark:",7)==0) return(RLRC_Ignore
);
183 if (strncasecmp(Line
,"Fields:",7)==0) {
185 while (*Line
==' ' || *Line
=='\t') Line
++;
186 if (!ExtLog_Fields(Line
)) return(RLRC_Unknown
);
189 return(RLRC_Unknown
);
193 Get the type of the column \a col_num.
195 \return The type of the column or EXTCOL_Last if
196 the column must be ignored.
198 static enum ext_col_id
ExtLog_WhichColumn(int col_num
)
202 for (i
=0 ; i
<EXTCOL_Last
&& ExtCols
[i
]!=col_num
; i
++);
207 Scan through the string of a column.
209 \param Line The pointer to the beginning of the string.
210 \param col The column number.
212 static char *ExtLog_GetString(char *Line
,int col
,char **End
)
217 //skip opening double quote
226 if (End
) *End
=(dequote
) ? NULL
: Line
;
227 Line
++;//skip the closing quote
234 if (*Line
==ExtColSep
[col
]) {
241 if (quote
) return(NULL
);//missing closing quote.
246 Scan through the date in a column.
248 \param Line The pointer to the beginning of the string.
250 static char *ExtLog_GetDate(char *Line
,struct tm
*Date
)
258 //skip opening double quote
261 if (sscanf(Line
,"%d-%d-%d%n",&year
,&month
,&day
,&next
)!=3) return(NULL
);
264 if (*Line
!='\"') return(NULL
);//missing closing quote.
267 Date
->tm_year
=year
-1900;
268 Date
->tm_mon
=month
-1;
274 Scan through the time in a column.
276 \param Line The pointer to the beginning of the string.
278 static char *ExtLog_GetTime(char *Line
,struct tm
*Date
)
286 //skip opening double quote
289 if (sscanf(Line
,"%d:%d:%d%n",&hour
,&minute
,&second
,&next
)!=3) return(NULL
);
292 if (*Line
!='\"') return(NULL
);//missing closing quote.
302 Scan through a number in a column.
304 \param Line The pointer to the beginning of the string.
305 \param Value A variable to store the number.
307 static char *ExtLog_GetLongInt(char *Line
,long int *Value
)
311 //skip opening double quote
315 while (isdigit(*Line
)) *Value
=*Value
*10+(*Line
++-'0');
317 if (*Line
!='\"') return(NULL
);//missing closing quote.
324 Scan through a number in a column.
326 \param Line The pointer to the beginning of the string.
327 \param Value A variable to store the number.
329 static char *ExtLog_GetLongLongInt(char *Line
,long long int *Value
)
333 //skip opening double quote
337 while (isdigit(*Line
)) *Value
=*Value
*10+(*Line
++-'0');
339 if (*Line
!='\"') return(NULL
);//missing closing quote.
346 Remove the quotes inside the \a string. If no quotes are known to
347 be in the string, the \a end_ptr is the pointer to the last
348 character of the string.
350 static void ExtLog_FixString(char *string
,char *end_ptr
)
354 if (!string
) return;//string not parsed
355 if (end_ptr
) { //end is known and no quotes are in the string
359 // remove the quotes and end at the first unremoveable quote
364 if (string
[1]!='\"') break; //closing quote
365 string
++;//skip the first quote
373 Read one entry from an extended log.
375 \param Line One line from the input log file.
376 \param Entry Where to store the information parsed from the line.
378 \retval RLRC_NoError One valid entry is parsed.
379 \retval RLRC_Unknown The line is invalid.
380 \retval RLRC_InternalError An internal error was encountered.
382 static enum ReadLogReturnCodeEnum
ExtLog_ReadEntry(char *Line
,struct ReadLogStruct
*Entry
)
385 enum ext_col_id col_id
;
395 enum ReadLogReturnCodeEnum status
=ExtLog_Directive(Line
);
396 if (status
!=RLRC_Unknown
) InExtLog
=true;
399 if (!InExtLog
) return(RLRC_Unknown
);
403 if (col
>=ExtColNumber
) {
404 debuga(__FILE__
,__LINE__
,_("Too many columns in an extended log file format: %d columns found when %d have been announced\n"),col
,ExtColNumber
);
405 return(RLRC_Unknown
);
407 col_id
=ExtLog_WhichColumn(col
);
412 Line
=ExtLog_GetString(Line
,col
,&IpEnd
);
413 if (!Line
) return(RLRC_Unknown
);
415 case EXTCOL_UserName
:
416 Entry
->User
=User
=Line
;
417 Line
=ExtLog_GetString(Line
,col
,&UserEnd
);
418 if (!Line
) return(RLRC_Unknown
);
421 Line
=ExtLog_GetDate(Line
,&Entry
->EntryTime
);
422 if (!Line
) return(RLRC_Unknown
);
425 Line
=ExtLog_GetTime(Line
,&Entry
->EntryTime
);
426 if (!Line
) return(RLRC_Unknown
);
428 case EXTCOL_TimeTaken
:
429 Line
=ExtLog_GetLongInt(Line
,&Entry
->ElapsedTime
);
430 if (!Line
) return(RLRC_Unknown
);
433 Line
=ExtLog_GetLongLongInt(Line
,&Entry
->DataSize
);
434 if (!Line
) return(RLRC_Unknown
);
438 Line
=ExtLog_GetString(Line
,col
,&UrlEnd
);
439 if (!Line
) return(RLRC_Unknown
);
442 Entry
->HttpCode
=Line
;
443 Line
=ExtLog_GetString(Line
,col
,&HttpCodeEnd
);
444 if (!Line
) return(RLRC_Unknown
);
446 case EXTCOL_Last
://ignored column
447 Line
=ExtLog_GetString(Line
,col
,NULL
);
448 if (!Line
) return(RLRC_Unknown
);
451 if (*Line
&& *Line
!=ExtColSep
[col
]) return(RLRC_Unknown
);
452 while (*Line
&& *Line
==ExtColSep
[col
]) Line
++;
455 if (col
!=ExtColNumber
) {
456 debuga(__FILE__
,__LINE__
,_("Only %d columns in an extended log file format when %d have been announced\n"),col
,ExtColNumber
);
457 return(RLRC_Unknown
);
460 // check the entry time
461 if (mktime(&Entry
->EntryTime
)==-1) {
462 debuga(__FILE__
,__LINE__
,_("Invalid date or time found in the extended log file\n"));
463 return(RLRC_InternalError
);
466 ExtLog_FixString(Ip
,IpEnd
);
467 ExtLog_FixString(User
,UserEnd
);
468 ExtLog_FixString(Entry
->Url
,UrlEnd
);
469 ExtLog_FixString(Entry
->HttpCode
,HttpCodeEnd
);
471 return(RLRC_NoError
);
474 //! \brief Object to read an extended log.
475 const struct ReadLogProcessStruct ReadExtLog
=
477 /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */
478 N_("extended log format"),