2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
32 Maximum number of columns accepted in an extended log format.
34 The current value is an arbitrary number chosen to have an
37 #define MAX_EXT_COLUMNS 250
53 EXTCOL_Last
//last entry of the list !
56 //! \c True if the extended common long format is confirmed.
57 static bool InExtLog
=false;
58 //! The index of relevant columns in the log file.
59 static int ExtCols
[EXTCOL_Last
];
60 //! The character to use as a columns separator.
61 static char ExtColSep
[MAX_EXT_COLUMNS
];
62 //! The number of columns according to the "fields" directive.
63 static int ExtColNumber
;
64 //! Temporary buffer to concatenate the url.
65 static char ExtTempUrl
[MAX_URL_LEN
];
68 A new file is being read. The name of the file is \a FileName.
70 static void ExtLog_NewFile(const char *FileName
)
77 Parse the "Fields" directive listing the columns in the log. The
78 \a columns is a pointer to the first column of the directive.
80 \return \c True if the fields is valid or false if it could not
83 static bool ExtLog_Fields(const char *columns
)
91 enum ext_col_id col_id
;
93 // see http://www.w3.org/TR/WD-logfile.html for the list of prefixes
94 const char const *prefixes
[]=
106 for (i
=0 ; i
<EXTCOL_Last
; i
++) ExtCols
[i
]=-1;
110 if (col
>=MAX_EXT_COLUMNS
) {
111 debuga(__FILE__
,__LINE__
,_("Too many columns found in an extended log format. The maximum allowed is %d but it can be changed if a bigger value is legitimate\n"),MAX_EXT_COLUMNS
);
117 for (i
=sizeof(prefixes
)/sizeof(*prefixes
)-1 ; i
>=0 ; i
--) {
118 len
=strlen(prefixes
[i
]);
119 if (strncasecmp(columns
,prefixes
[i
],len
)==0) {
120 if (columns
[len
]=='-') {
123 } else if (columns
[len
]=='(') {
129 (void)prefix
;//compiler pacifier
131 for ( ; (unsigned char)columns
[len
]>' ' ; len
++) {//skip a word and accept any separator (tab or space)
132 if (header_start
>=0 && columns
[len
]==')') header_end
=len
;
134 (void)header_end
;//compiler pacifier
135 col_sep
=columns
[len
];
136 ExtColSep
[col
]=col_sep
;
138 // see http://www.w3.org/TR/WD-logfile.html for list of possible identifiers
141 if (strncasecmp(columns
,"c-ip",len
)==0 && ExtCols
[EXTCOL_Ip
]<0) col_id
=EXTCOL_Ip
;
142 else if (strncasecmp(columns
,"date",len
)==0) col_id
=EXTCOL_Date
;
143 else if (strncasecmp(columns
,"time",len
)==0) col_id
=EXTCOL_Time
;
145 if (strncasecmp(columns
,"c-dns",len
)==0) col_id
=EXTCOL_Ip
;
147 if (strncasecmp(columns
,"cs-uri",len
)==0) col_id
=EXTCOL_Uri
;
149 if (strncasecmp(columns
,"cs-host",len
)==0) col_id
=EXTCOL_Host
;
151 if (strncasecmp(columns
,"sc-bytes",len
)==0) col_id
=EXTCOL_Bytes
;
153 if (strncasecmp(columns
,"sc-status",len
)==0) col_id
=EXTCOL_Status
;
154 } else if (len
==10) {
155 if (strncasecmp(columns
,"time-taken",len
)==0) col_id
=EXTCOL_TimeTaken
;
156 } else if (len
==11) {
157 if (strncasecmp(columns
,"cs-username",len
)==0) col_id
=EXTCOL_UserName
;
158 if (strncasecmp(columns
,"cs-uri-port",len
)==0) col_id
=EXTCOL_Port
;
159 if (strncasecmp(columns
,"cs-uri-path",len
)==0) col_id
=EXTCOL_Path
;
160 } else if (len
==12) {
161 if (strncasecmp(columns
,"cs-uri-query",len
)==0) col_id
=EXTCOL_Query
;
162 } else if (len
==13) {
163 if (strncasecmp(columns
,"cs-uri-scheme",len
)==0) col_id
=EXTCOL_Scheme
;
165 if (col_id
!=EXTCOL_Last
) {
171 while (*columns
&& (unsigned char)*columns
<=' ') {
172 if (*columns
!=col_sep
) {
173 debuga(__FILE__
,__LINE__
,_("Multiple column separators found between two columns in the \"fields\" directive of an extended log format\n"));
184 Decode a directive field from the \a Line.
186 \return RLRC_Ignore if the line is a directive or RLRC_Unknown
187 if the line is not a known directive.
189 static enum ReadLogReturnCodeEnum
ExtLog_Directive(const char *Line
)
192 if (strncasecmp(Line
,"Version:",8)==0) return(RLRC_Ignore
);
193 if (strncasecmp(Line
,"Software:",9)==0) return(RLRC_Ignore
);
194 if (strncasecmp(Line
,"Start-Date:",11)==0) return(RLRC_Ignore
);
195 if (strncasecmp(Line
,"End-Date:",9)==0) return(RLRC_Ignore
);
196 if (strncasecmp(Line
,"Date:",5)==0) return(RLRC_Ignore
);
197 if (strncasecmp(Line
,"Remark:",7)==0) return(RLRC_Ignore
);
198 if (strncasecmp(Line
,"Fields:",7)==0) {
200 while (*Line
==' ' || *Line
=='\t') Line
++;
201 if (!ExtLog_Fields(Line
)) return(RLRC_Unknown
);
204 return(RLRC_Unknown
);
208 Get the type of the column \a col_num.
210 \return The type of the column or EXTCOL_Last if
211 the column must be ignored.
213 static enum ext_col_id
ExtLog_WhichColumn(int col_num
)
217 for (i
=0 ; i
<EXTCOL_Last
&& ExtCols
[i
]!=col_num
; i
++);
222 Scan through the string of a column.
224 \param Line The pointer to the beginning of the string.
225 \param col The column number.
227 static char *ExtLog_GetString(char *Line
,int col
,char **End
)
232 //skip opening double quote
241 if (End
) *End
=(dequote
) ? NULL
: Line
;
242 Line
++;//skip the closing quote
249 if (*Line
==ExtColSep
[col
]) {
256 if (quote
) return(NULL
);//missing closing quote.
261 Scan through the date in a column.
263 \param Line The pointer to the beginning of the string.
265 static char *ExtLog_GetDate(char *Line
,struct tm
*Date
)
273 //skip opening double quote
276 if (sscanf(Line
,"%d-%d-%d%n",&year
,&month
,&day
,&next
)!=3) return(NULL
);
279 if (*Line
!='\"') return(NULL
);//missing closing quote.
282 Date
->tm_year
=year
-1900;
283 Date
->tm_mon
=month
-1;
289 Scan through the time in a column.
291 \param Line The pointer to the beginning of the string.
293 static char *ExtLog_GetTime(char *Line
,struct tm
*Date
)
301 //skip opening double quote
304 if (sscanf(Line
,"%d:%d:%d%n",&hour
,&minute
,&second
,&next
)!=3) return(NULL
);
307 if (*Line
!='\"') return(NULL
);//missing closing quote.
317 Scan through a number in a column.
319 \param Line The pointer to the beginning of the string.
320 \param Value A variable to store the number.
322 static char *ExtLog_GetLongInt(char *Line
,long int *Value
)
326 //skip opening double quote
330 while (isdigit(*Line
)) *Value
=*Value
*10+(*Line
++-'0');
332 if (*Line
!='\"') return(NULL
);//missing closing quote.
339 Scan through a number in a column.
341 \param Line The pointer to the beginning of the string.
342 \param Value A variable to store the number.
344 static char *ExtLog_GetLongLongInt(char *Line
,long long int *Value
)
348 //skip opening double quote
352 while (isdigit(*Line
)) *Value
=*Value
*10+(*Line
++-'0');
354 if (*Line
!='\"') return(NULL
);//missing closing quote.
361 Remove the quotes inside the \a string. If no quotes are known to
362 be in the string, the \a end_ptr is the pointer to the last
363 character of the string.
365 static void ExtLog_FixString(char *string
,char *end_ptr
)
369 if (!string
) return;//string not parsed
370 if (end_ptr
) { //end is known and no quotes are in the string
374 // remove the quotes and end at the first unremoveable quote
379 if (string
[1]!='\"') break; //closing quote
380 string
++;//skip the first quote
388 * Discard a empty string.
390 * An empty string may contain a single dash.
392 * \param String The string to check.
394 * \return The string pointer if it isn't empty or NULL if the string
397 static const char *ExtLog_FixEmptyString(const char *String
)
399 if (String
&& (String
[0]=='\0' || (String
[0]=='-' && String
[1]=='\0'))) String
=NULL
;
404 * Create the URL from the split elements.
406 static char *ExtLog_ConcatUrl(const char *Scheme
,const char *Host
,const char *Port
,const char *Path
,const char *Query
)
411 Host
=ExtLog_FixEmptyString(Host
);
412 if (!Host
) return(NULL
);
413 Scheme
=ExtLog_FixEmptyString(Scheme
);
414 Port
=ExtLog_FixEmptyString(Port
);
415 Path
=ExtLog_FixEmptyString(Path
);
416 Query
=ExtLog_FixEmptyString(Query
);
421 if (tlen
+len
+3>=sizeof(ExtTempUrl
))
423 debuga(__FILE__
,__LINE__
,_("URI scheme too long in log file\n"));
426 strcpy(ExtTempUrl
,Scheme
);
427 strcpy(ExtTempUrl
+len
,"://");
432 if (tlen
+len
>=sizeof(ExtTempUrl
)) len
=sizeof(ExtTempUrl
)-tlen
-1;
433 strncpy(ExtTempUrl
+tlen
,Host
,len
);
435 ExtTempUrl
[tlen
]='\0';
437 if (tlen
+2<sizeof(ExtTempUrl
) && Port
)
440 if (tlen
+len
+1>=sizeof(ExtTempUrl
)) len
=sizeof(ExtTempUrl
)-tlen
-2;
441 ExtTempUrl
[tlen
++]=':';
442 strncpy(ExtTempUrl
+tlen
,Port
,len
);
444 ExtTempUrl
[tlen
]='\0';
447 if (tlen
<sizeof(ExtTempUrl
) && Path
)
450 if (tlen
+len
>=sizeof(ExtTempUrl
)) len
=sizeof(ExtTempUrl
)-tlen
-1;
451 strncpy(ExtTempUrl
+tlen
,Path
,len
);
453 ExtTempUrl
[tlen
]='\0';
456 if (tlen
<sizeof(ExtTempUrl
) && Query
)
459 if (tlen
+len
>=sizeof(ExtTempUrl
)) len
=sizeof(ExtTempUrl
)-tlen
-1;
460 strncpy(ExtTempUrl
+tlen
,Query
,len
);
462 ExtTempUrl
[tlen
]='\0';
468 Read one entry from an extended log.
470 \param Line One line from the input log file.
471 \param Entry Where to store the information parsed from the line.
473 \retval RLRC_NoError One valid entry is parsed.
474 \retval RLRC_Unknown The line is invalid.
475 \retval RLRC_InternalError An internal error was encountered.
477 static enum ReadLogReturnCodeEnum
ExtLog_ReadEntry(char *Line
,struct ReadLogStruct
*Entry
)
480 enum ext_col_id col_id
;
486 char *UrlScheme
=NULL
,*UrlSchemeEnd
;
487 char *UrlHost
=NULL
,*UrlHostEnd
;
488 char *UrlPort
=NULL
,*UrlPortEnd
;
489 char *UrlPath
=NULL
,*UrlPathEnd
;
490 char *UrlQuery
=NULL
,*UrlQueryEnd
;
494 enum ReadLogReturnCodeEnum status
=ExtLog_Directive(Line
);
495 if (status
!=RLRC_Unknown
) InExtLog
=true;
498 if (!InExtLog
) return(RLRC_Unknown
);
502 if (col
>=ExtColNumber
) {
503 debuga(__FILE__
,__LINE__
,_("Too many columns in an extended log file format: %d columns found when %d have been announced\n"),col
,ExtColNumber
);
504 return(RLRC_Unknown
);
506 col_id
=ExtLog_WhichColumn(col
);
511 Line
=ExtLog_GetString(Line
,col
,&IpEnd
);
512 if (!Line
) return(RLRC_Unknown
);
514 case EXTCOL_UserName
:
516 Line
=ExtLog_GetString(Line
,col
,&UserEnd
);
517 if (!Line
) return(RLRC_Unknown
);
520 Line
=ExtLog_GetDate(Line
,&Entry
->EntryTime
);
521 if (!Line
) return(RLRC_Unknown
);
524 Line
=ExtLog_GetTime(Line
,&Entry
->EntryTime
);
525 if (!Line
) return(RLRC_Unknown
);
527 case EXTCOL_TimeTaken
:
528 Line
=ExtLog_GetLongInt(Line
,&Entry
->ElapsedTime
);
529 if (!Line
) return(RLRC_Unknown
);
532 Line
=ExtLog_GetLongLongInt(Line
,&Entry
->DataSize
);
533 if (!Line
) return(RLRC_Unknown
);
537 Line
=ExtLog_GetString(Line
,col
,&UrlEnd
);
538 if (!Line
) return(RLRC_Unknown
);
542 Line
=ExtLog_GetString(Line
,col
,&UrlSchemeEnd
);
543 if (!Line
) return(RLRC_Unknown
);
547 Line
=ExtLog_GetString(Line
,col
,&UrlHostEnd
);
548 if (!Line
) return(RLRC_Unknown
);
552 Line
=ExtLog_GetString(Line
,col
,&UrlPortEnd
);
553 if (!Line
) return(RLRC_Unknown
);
557 Line
=ExtLog_GetString(Line
,col
,&UrlPathEnd
);
558 if (!Line
) return(RLRC_Unknown
);
562 Line
=ExtLog_GetString(Line
,col
,&UrlQueryEnd
);
563 if (!Line
) return(RLRC_Unknown
);
566 Entry
->HttpCode
=Line
;
567 Line
=ExtLog_GetString(Line
,col
,&HttpCodeEnd
);
568 if (!Line
) return(RLRC_Unknown
);
570 case EXTCOL_Last
://ignored column
571 Line
=ExtLog_GetString(Line
,col
,NULL
);
572 if (!Line
) return(RLRC_Unknown
);
575 if (*Line
&& *Line
!=ExtColSep
[col
]) return(RLRC_Unknown
);
576 while (*Line
&& *Line
==ExtColSep
[col
]) Line
++;
579 if (col
!=ExtColNumber
) {
580 debuga(__FILE__
,__LINE__
,_("Only %d columns in an extended log file format when %d have been announced\n"),col
,ExtColNumber
);
581 return(RLRC_Unknown
);
584 // check the entry time
585 if (mktime(&Entry
->EntryTime
)==-1) {
586 debuga(__FILE__
,__LINE__
,_("Invalid date or time found in the extended log file\n"));
587 return(RLRC_InternalError
);
590 ExtLog_FixString(Ip
,IpEnd
);
591 ExtLog_FixString(Entry
->User
,UserEnd
);
592 ExtLog_FixString(Entry
->Url
,UrlEnd
);
593 ExtLog_FixString(Entry
->HttpCode
,HttpCodeEnd
);
596 ExtLog_FixString(UrlScheme
,UrlSchemeEnd
);
597 ExtLog_FixString(UrlHost
,UrlHostEnd
);
598 ExtLog_FixString(UrlPort
,UrlPortEnd
);
599 ExtLog_FixString(UrlPath
,UrlPathEnd
);
600 ExtLog_FixString(UrlQuery
,UrlQueryEnd
);
601 Entry
->Url
=ExtLog_ConcatUrl(UrlScheme
,UrlHost
,UrlPort
,UrlPath
,UrlQuery
);
604 return(RLRC_NoError
);
607 //! \brief Object to read an extended log.
608 const struct ReadLogProcessStruct ReadExtLog
=
610 /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */
611 N_("extended log format"),