2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
32 Maximum number of columns accepted in an extended log format.
34 The current value is an arbitrary number chosen to have an
37 #define MAX_EXT_COLUMNS 250
48 EXTCOL_Last
//last entry of the list !
51 //! \c True if the extended common long format is confirmed.
52 static bool InExtLog
=false;
53 //! The index of relevant columns in the log file.
54 static int ExtCols
[EXTCOL_Last
];
55 //! The character to use as a columns separator.
56 static char ExtColSep
[MAX_EXT_COLUMNS
];
57 //! The number of columns according to the "fields" directive.
58 static int ExtColNumber
;
61 A new file is being read. The name of the file is \a FileName.
63 static void ExtLog_NewFile(const char *FileName
)
70 Parse the "Fields" directive listing the columns in the log. The
71 \a columns is a pointer to the first column of the directive.
73 \return \c True if the fields is valid or false if it could not
76 static bool ExtLog_Fields(const char *columns
)
84 enum ext_col_id col_id
;
86 // see http://www.w3.org/TR/WD-logfile.html for the list of prefixes
87 const char const *prefixes
[]=
99 for (i
=0 ; i
<EXTCOL_Last
; i
++) ExtCols
[i
]=-1;
103 if (col
>=MAX_EXT_COLUMNS
) {
104 debuga(__FILE__
,__LINE__
,_("Too many columns found in an extended log format. The maximum allowed is %d but it can be changed if a bigger value is legitimate\n"),MAX_EXT_COLUMNS
);
110 for (i
=sizeof(prefixes
)/sizeof(*prefixes
)-1 ; i
>=0 ; i
--) {
111 len
=strlen(prefixes
[i
]);
112 if (strncasecmp(columns
,prefixes
[i
],len
)==0) {
113 if (columns
[len
]=='-') {
116 } else if (columns
[len
]=='(') {
122 (void)prefix
;//compiler pacifier
124 for ( ; (unsigned char)columns
[len
]>' ' ; len
++) {//skip a word and accept any separator (tab or space)
125 if (header_start
>=0 && columns
[len
]==')') header_end
=len
;
127 (void)header_end
;//compiler pacifier
128 col_sep
=columns
[len
];
129 ExtColSep
[col
]=col_sep
;
131 // see http://www.w3.org/TR/WD-logfile.html for list of possible identifiers
134 if (strncasecmp(columns
,"c-ip",len
)==0 && ExtCols
[EXTCOL_Ip
]<0) col_id
=EXTCOL_Ip
;
135 else if (strncasecmp(columns
,"date",len
)==0) col_id
=EXTCOL_Date
;
136 else if (strncasecmp(columns
,"time",len
)==0) col_id
=EXTCOL_Time
;
138 if (strncasecmp(columns
,"c-dns",len
)==0) col_id
=EXTCOL_Ip
;
140 if (strncasecmp(columns
,"cs-uri",len
)==0) col_id
=EXTCOL_Uri
;
142 if (strncasecmp(columns
,"sc-bytes",len
)==0) col_id
=EXTCOL_Bytes
;
144 if (strncasecmp(columns
,"sc-status",len
)==0) col_id
=EXTCOL_Status
;
145 } else if (len
==10) {
146 if (strncasecmp(columns
,"time-taken",len
)==0) col_id
=EXTCOL_TimeTaken
;
147 } else if (len
==11) {
148 if (strncasecmp(columns
,"cs-username",len
)==0) col_id
=EXTCOL_UserName
;
150 if (col_id
!=EXTCOL_Last
) {
156 while (*columns
&& (unsigned char)*columns
<=' ') {
157 if (*columns
!=col_sep
) {
158 debuga(__FILE__
,__LINE__
,_("Multiple column separators found between two columns in the \"fields\" directive of an extended log format\n"));
169 Decode a directive field from the \a Line.
171 \return RLRC_Ignore if the line is a directive or RLRC_Unknown
172 if the line is not a known directive.
174 static enum ReadLogReturnCodeEnum
ExtLog_Directive(const char *Line
)
177 if (strncasecmp(Line
,"Version:",8)==0) return(RLRC_Ignore
);
178 if (strncasecmp(Line
,"Software:",9)==0) return(RLRC_Ignore
);
179 if (strncasecmp(Line
,"Start-Date:",11)==0) return(RLRC_Ignore
);
180 if (strncasecmp(Line
,"End-Date:",9)==0) return(RLRC_Ignore
);
181 if (strncasecmp(Line
,"Date:",5)==0) return(RLRC_Ignore
);
182 if (strncasecmp(Line
,"Remark:",7)==0) return(RLRC_Ignore
);
183 if (strncasecmp(Line
,"Fields:",7)==0) {
185 while (*Line
==' ' || *Line
=='\t') Line
++;
186 if (!ExtLog_Fields(Line
)) return(RLRC_Unknown
);
189 return(RLRC_Unknown
);
193 Get the type of the column \a col_num.
195 \return The type of the column or EXTCOL_Last if
196 the column must be ignored.
198 static enum ext_col_id
ExtLog_WhichColumn(int col_num
)
202 for (i
=0 ; i
<EXTCOL_Last
&& ExtCols
[i
]!=col_num
; i
++);
207 Scan through the string of a column.
209 \param Line The pointer to the beginning of the string.
210 \param col The column number.
212 static char *ExtLog_GetString(char *Line
,int col
,char **End
)
217 //skip opening double quote
226 if (End
) *End
=(dequote
) ? NULL
: Line
;
227 Line
++;//skip the closing quote
234 if (*Line
==ExtColSep
[col
]) {
241 if (quote
) return(NULL
);//missing closing quote.
246 Scan through the date in a column.
248 \param Line The pointer to the beginning of the string.
250 static char *ExtLog_GetDate(char *Line
,struct tm
*Date
)
258 //skip opening double quote
261 if (sscanf(Line
,"%d-%d-%d%n",&year
,&month
,&day
,&next
)!=3) return(NULL
);
264 if (*Line
!='\"') return(NULL
);//missing closing quote.
267 Date
->tm_year
=year
-1900;
268 Date
->tm_mon
=month
-1;
274 Scan through the time in a column.
276 \param Line The pointer to the beginning of the string.
278 static char *ExtLog_GetTime(char *Line
,struct tm
*Date
)
286 //skip opening double quote
289 if (sscanf(Line
,"%d:%d:%d%n",&hour
,&minute
,&second
,&next
)!=3) return(NULL
);
292 if (*Line
!='\"') return(NULL
);//missing closing quote.
302 Scan through a number in a column.
304 \param Line The pointer to the beginning of the string.
305 \param Value A variable to store the number.
307 static char *ExtLog_GetLongInt(char *Line
,long int *Value
)
311 //skip opening double quote
315 while (isdigit(*Line
)) *Value
=*Value
*10+(*Line
++-'0');
317 if (*Line
!='\"') return(NULL
);//missing closing quote.
324 Scan through a number in a column.
326 \param Line The pointer to the beginning of the string.
327 \param Value A variable to store the number.
329 static char *ExtLog_GetLongLongInt(char *Line
,long long int *Value
)
333 //skip opening double quote
337 while (isdigit(*Line
)) *Value
=*Value
*10+(*Line
++-'0');
339 if (*Line
!='\"') return(NULL
);//missing closing quote.
346 Remove the quotes inside the \a string. If no quotes are known to
347 be in the string, the \a end_ptr is the pointer to the last
348 character of the string.
350 static void ExtLog_FixString(char *string
,char *end_ptr
)
354 if (!string
) return;//string not parsed
355 if (end_ptr
) { //end is known and no quotes are in the string
359 // remove the quotes and end at the first unremoveable quote
364 if (string
[1]!='\"') break; //closing quote
365 string
++;//skip the first quote
373 Read one entry from an extended log.
375 \param Line One line from the input log file.
376 \param Entry Where to store the information parsed from the line.
378 \retval RLRC_NoError One valid entry is parsed.
379 \retval RLRC_Unknown The line is invalid.
380 \retval RLRC_InternalError An internal error was encountered.
382 static enum ReadLogReturnCodeEnum
ExtLog_ReadEntry(char *Line
,struct ReadLogStruct
*Entry
)
385 enum ext_col_id col_id
;
394 enum ReadLogReturnCodeEnum status
=ExtLog_Directive(Line
);
395 if (status
!=RLRC_Unknown
) InExtLog
=true;
398 if (!InExtLog
) return(RLRC_Unknown
);
402 if (col
>=ExtColNumber
) {
403 debuga(__FILE__
,__LINE__
,_("Too many columns in an extended log file format: %d columns found when %d have been announced\n"),col
,ExtColNumber
);
404 return(RLRC_Unknown
);
406 col_id
=ExtLog_WhichColumn(col
);
411 Line
=ExtLog_GetString(Line
,col
,&IpEnd
);
412 if (!Line
) return(RLRC_Unknown
);
414 case EXTCOL_UserName
:
416 Line
=ExtLog_GetString(Line
,col
,&UserEnd
);
417 if (!Line
) return(RLRC_Unknown
);
420 Line
=ExtLog_GetDate(Line
,&Entry
->EntryTime
);
421 if (!Line
) return(RLRC_Unknown
);
424 Line
=ExtLog_GetTime(Line
,&Entry
->EntryTime
);
425 if (!Line
) return(RLRC_Unknown
);
427 case EXTCOL_TimeTaken
:
428 Line
=ExtLog_GetLongInt(Line
,&Entry
->ElapsedTime
);
429 if (!Line
) return(RLRC_Unknown
);
432 Line
=ExtLog_GetLongLongInt(Line
,&Entry
->DataSize
);
433 if (!Line
) return(RLRC_Unknown
);
437 Line
=ExtLog_GetString(Line
,col
,&UrlEnd
);
438 if (!Line
) return(RLRC_Unknown
);
441 Entry
->HttpCode
=Line
;
442 Line
=ExtLog_GetString(Line
,col
,&HttpCodeEnd
);
443 if (!Line
) return(RLRC_Unknown
);
445 case EXTCOL_Last
://ignored column
446 Line
=ExtLog_GetString(Line
,col
,NULL
);
447 if (!Line
) return(RLRC_Unknown
);
450 if (*Line
&& *Line
!=ExtColSep
[col
]) return(RLRC_Unknown
);
451 while (*Line
&& *Line
==ExtColSep
[col
]) Line
++;
454 if (col
!=ExtColNumber
) {
455 debuga(__FILE__
,__LINE__
,_("Only %d columns in an extended log file format when %d have been announced\n"),col
,ExtColNumber
);
456 return(RLRC_Unknown
);
459 // check the entry time
460 if (mktime(&Entry
->EntryTime
)==-1) {
461 debuga(__FILE__
,__LINE__
,_("Invalid date or time found in the extended log file\n"));
462 return(RLRC_InternalError
);
465 ExtLog_FixString(Ip
,IpEnd
);
466 ExtLog_FixString(Entry
->User
,UserEnd
);
467 ExtLog_FixString(Entry
->Url
,UrlEnd
);
468 ExtLog_FixString(Entry
->HttpCode
,HttpCodeEnd
);
470 return(RLRC_NoError
);
473 //! \brief Object to read an extended log.
474 const struct ReadLogProcessStruct ReadExtLog
=
476 /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */
477 N_("extended log format"),