2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
32 Maximum number of columns accepted in an extended log format.
34 The current value is an arbitrary number chosen to have an
37 #define MAX_EXT_COLUMNS 250
53 EXTCOL_Last
//last entry of the list !
56 //! \c True if the extended common long format is confirmed.
57 static bool InExtLog
=false;
58 //! The index of relevant columns in the log file.
59 static int ExtCols
[EXTCOL_Last
];
60 //! The character to use as a columns separator.
61 static char ExtColSep
[MAX_EXT_COLUMNS
];
62 //! The number of columns according to the "fields" directive.
63 static int ExtColNumber
;
64 //! Temporary buffer to concatenate the url.
65 static char ExtTempUrl
[MAX_URL_LEN
];
68 A new file is being read. The name of the file is \a FileName.
70 static void ExtLog_NewFile(const char *FileName
)
77 Parse the "Fields" directive listing the columns in the log. The
78 \a columns is a pointer to the first column of the directive.
80 \return \c True if the fields is valid or false if it could not
83 static bool ExtLog_Fields(const char *columns
)
91 enum ext_col_id col_id
;
93 // see http://www.w3.org/TR/WD-logfile.html for the list of prefixes
94 const char const *prefixes
[]=
106 for (i
=0 ; i
<EXTCOL_Last
; i
++) ExtCols
[i
]=-1;
110 if (col
>=MAX_EXT_COLUMNS
) {
111 debuga(__FILE__
,__LINE__
,_("Too many columns found in an extended log format. The maximum allowed is %d but it can be changed if a bigger value is legitimate\n"),MAX_EXT_COLUMNS
);
117 for (i
=sizeof(prefixes
)/sizeof(*prefixes
)-1 ; i
>=0 ; i
--) {
118 len
=strlen(prefixes
[i
]);
119 if (strncasecmp(columns
,prefixes
[i
],len
)==0) {
120 if (columns
[len
]=='-') {
123 } else if (columns
[len
]=='(') {
129 (void)prefix
;//compiler pacifier
131 for ( ; (unsigned char)columns
[len
]>' ' ; len
++) {//skip a word and accept any separator (tab or space)
132 if (header_start
>=0 && columns
[len
]==')') header_end
=len
;
134 (void)header_end
;//compiler pacifier
135 col_sep
=columns
[len
];
136 ExtColSep
[col
]=col_sep
;
138 // see http://www.w3.org/TR/WD-logfile.html for list of possible identifiers
141 if (strncasecmp(columns
,"c-ip",len
)==0 && ExtCols
[EXTCOL_Ip
]<0) col_id
=EXTCOL_Ip
;
142 else if (strncasecmp(columns
,"date",len
)==0) col_id
=EXTCOL_Date
;
143 else if (strncasecmp(columns
,"time",len
)==0) col_id
=EXTCOL_Time
;
145 if (strncasecmp(columns
,"c-dns",len
)==0) col_id
=EXTCOL_Ip
;
147 if (strncasecmp(columns
,"cs-uri",len
)==0) col_id
=EXTCOL_Uri
;
149 if (strncasecmp(columns
,"cs-host",len
)==0) col_id
=EXTCOL_Host
;
151 if (strncasecmp(columns
,"sc-bytes",len
)==0) col_id
=EXTCOL_Bytes
;
153 if (strncasecmp(columns
,"sc-status",len
)==0) col_id
=EXTCOL_Status
;
154 } else if (len
==10) {
155 if (strncasecmp(columns
,"time-taken",len
)==0) col_id
=EXTCOL_TimeTaken
;
156 } else if (len
==11) {
157 if (strncasecmp(columns
,"cs-username",len
)==0) col_id
=EXTCOL_UserName
;
158 if (strncasecmp(columns
,"cs-uri-port",len
)==0) col_id
=EXTCOL_Port
;
159 if (strncasecmp(columns
,"cs-uri-path",len
)==0) col_id
=EXTCOL_Path
;
160 } else if (len
==12) {
161 if (strncasecmp(columns
,"cs-uri-query",len
)==0) col_id
=EXTCOL_Query
;
162 } else if (len
==13) {
163 if (strncasecmp(columns
,"cs-uri-scheme",len
)==0) col_id
=EXTCOL_Scheme
;
165 if (col_id
!=EXTCOL_Last
) {
171 while (*columns
&& (unsigned char)*columns
<=' ') {
172 if (*columns
!=col_sep
) {
173 debuga(__FILE__
,__LINE__
,_("Multiple column separators found between two columns in the \"fields\" directive of an extended log format\n"));
184 Decode a directive field from the \a Line.
186 \return RLRC_Ignore if the line is a directive or RLRC_Unknown
187 if the line is not a known directive.
189 static enum ReadLogReturnCodeEnum
ExtLog_Directive(const char *Line
)
192 if (strncasecmp(Line
,"Version:",8)==0) return(RLRC_Ignore
);
193 if (strncasecmp(Line
,"Software:",9)==0) return(RLRC_Ignore
);
194 if (strncasecmp(Line
,"Start-Date:",11)==0) return(RLRC_Ignore
);
195 if (strncasecmp(Line
,"End-Date:",9)==0) return(RLRC_Ignore
);
196 if (strncasecmp(Line
,"Date:",5)==0) return(RLRC_Ignore
);
197 if (strncasecmp(Line
,"Remark:",7)==0) return(RLRC_Ignore
);
198 if (strncasecmp(Line
,"Fields:",7)==0) {
200 while (*Line
==' ' || *Line
=='\t') Line
++;
201 if (!ExtLog_Fields(Line
)) return(RLRC_Unknown
);
204 return(RLRC_Unknown
);
208 Get the type of the column \a col_num.
210 \return The type of the column or EXTCOL_Last if
211 the column must be ignored.
213 static enum ext_col_id
ExtLog_WhichColumn(int col_num
)
217 for (i
=0 ; i
<EXTCOL_Last
&& ExtCols
[i
]!=col_num
; i
++);
222 Scan through the string of a column.
224 \param Line The pointer to the beginning of the string.
225 \param col The column number.
227 static char *ExtLog_GetString(char *Line
,int col
,char **End
)
232 //skip opening double quote
241 if (End
) *End
=(dequote
) ? NULL
: Line
;
242 Line
++;//skip the closing quote
249 if (*Line
==ExtColSep
[col
]) {
256 if (quote
) return(NULL
);//missing closing quote.
261 Scan through the date in a column.
263 \param Line The pointer to the beginning of the string.
265 static char *ExtLog_GetDate(char *Line
,struct tm
*Date
)
273 //skip opening double quote
276 if (sscanf(Line
,"%d-%d-%d%n",&year
,&month
,&day
,&next
)!=3) return(NULL
);
279 if (*Line
!='\"') return(NULL
);//missing closing quote.
282 Date
->tm_year
=year
-1900;
283 Date
->tm_mon
=month
-1;
289 Scan through the time in a column.
291 \param Line The pointer to the beginning of the string.
293 static char *ExtLog_GetTime(char *Line
,struct tm
*Date
)
301 //skip opening double quote
304 if (sscanf(Line
,"%d:%d:%d%n",&hour
,&minute
,&second
,&next
)!=3) return(NULL
);
307 if (*Line
!='\"') return(NULL
);//missing closing quote.
317 Scan through a number in a column.
319 \param Line The pointer to the beginning of the string.
320 \param Value A variable to store the number.
322 static char *ExtLog_GetLongInt(char *Line
,long int *Value
)
326 //skip opening double quote
330 while (isdigit(*Line
)) *Value
=*Value
*10+(*Line
++-'0');
332 if (*Line
!='\"') return(NULL
);//missing closing quote.
339 Scan through a number in a column.
341 \param Line The pointer to the beginning of the string.
342 \param Value A variable to store the number.
344 static char *ExtLog_GetLongLongInt(char *Line
,long long int *Value
)
348 //skip opening double quote
352 while (isdigit(*Line
)) *Value
=*Value
*10+(*Line
++-'0');
354 if (*Line
!='\"') return(NULL
);//missing closing quote.
361 Remove the quotes inside the \a string. If no quotes are known to
362 be in the string, the \a end_ptr is the pointer to the last
363 character of the string.
365 static void ExtLog_FixString(char *string
,char *end_ptr
)
369 if (!string
) return;//string not parsed
370 if (end_ptr
) { //end is known and no quotes are in the string
374 // remove the quotes and end at the first unremoveable quote
379 if (string
[1]!='\"') break; //closing quote
380 string
++;//skip the first quote
388 * Discard a empty string.
390 * An empty string may contain a single dash.
392 * \param String The string to check.
394 * \return The string pointer if it isn't empty or NULL if the string
397 static const char *ExtLog_FixEmptyString(const char *String
)
399 if (String
&& (String
[0]=='\0' || (String
[0]=='-' && String
[1]=='\0'))) String
=NULL
;
404 * Create the URL from the split elements.
406 static char *ExtLog_ConcatUrl(const char *Scheme
,const char *Host
,const char *Port
,const char *Path
,const char *Query
)
411 Scheme
=ExtLog_FixEmptyString(Scheme
);
412 Host
=ExtLog_FixEmptyString(Host
);
413 if (!Scheme
&& !Host
)
416 * Example of such an entry:
419 * date time time-taken c-ip sc-status s-action sc-bytes cs-bytes cs-method cs-uri-scheme cs-host cs-uri-port cs-uri-path cs-uri-query cs-username cs-auth-group s-hierarchy s-supplier-name rs(Content-Type) cs(Referer) cs(User-Agent) sc-filter-result cs-categories x-virus-id s-ip
420 * 2015-07-29 06:05:50 30 192.168.1.21 400 TCP_NC_MISS 903 8163 unknown - - 0 / - userid - - 10.81.0.26 - - - DENIED "unavailable" - 10.81.0.26 - - ICAP_NOT_SCANNED - - -
422 * It looks like a failed connection attempt to an unavailable resource. Let's assume it is safe to ignore it.
427 Port
=ExtLog_FixEmptyString(Port
);
428 Path
=ExtLog_FixEmptyString(Path
);
429 Query
=ExtLog_FixEmptyString(Query
);
434 if (tlen
+len
+3>=sizeof(ExtTempUrl
))
436 debuga(__FILE__
,__LINE__
,_("URI scheme too long in log file\n"));
439 strcpy(ExtTempUrl
,Scheme
);
440 strcpy(ExtTempUrl
+len
,"://");
447 if (tlen
+len
>=sizeof(ExtTempUrl
)) len
=sizeof(ExtTempUrl
)-tlen
-1;
448 strncpy(ExtTempUrl
+tlen
,Host
,len
);
452 if (tlen
+2<sizeof(ExtTempUrl
) && Port
)
455 if (tlen
+len
+1>=sizeof(ExtTempUrl
)) len
=sizeof(ExtTempUrl
)-tlen
-2;
456 ExtTempUrl
[tlen
++]=':';
457 strncpy(ExtTempUrl
+tlen
,Port
,len
);
461 if (tlen
<sizeof(ExtTempUrl
) && Path
)
464 if (tlen
+len
>=sizeof(ExtTempUrl
)) len
=sizeof(ExtTempUrl
)-tlen
-1;
465 strncpy(ExtTempUrl
+tlen
,Path
,len
);
469 if (tlen
<sizeof(ExtTempUrl
) && Query
)
472 if (tlen
+len
>=sizeof(ExtTempUrl
)) len
=sizeof(ExtTempUrl
)-tlen
-1;
473 strncpy(ExtTempUrl
+tlen
,Query
,len
);
476 ExtTempUrl
[tlen
]='\0';
481 Read one entry from an extended log.
483 \param Line One line from the input log file.
484 \param Entry Where to store the information parsed from the line.
486 \retval RLRC_NoError One valid entry is parsed.
487 \retval RLRC_Unknown The line is invalid.
488 \retval RLRC_InternalError An internal error was encountered.
490 static enum ReadLogReturnCodeEnum
ExtLog_ReadEntry(char *Line
,struct ReadLogStruct
*Entry
)
493 enum ext_col_id col_id
;
500 char *UrlScheme
=NULL
,*UrlSchemeEnd
;
501 char *UrlHost
=NULL
,*UrlHostEnd
;
502 char *UrlPort
=NULL
,*UrlPortEnd
;
503 char *UrlPath
=NULL
,*UrlPathEnd
;
504 char *UrlQuery
=NULL
,*UrlQueryEnd
;
508 enum ReadLogReturnCodeEnum status
=ExtLog_Directive(Line
);
509 if (status
!=RLRC_Unknown
) InExtLog
=true;
512 if (!InExtLog
) return(RLRC_Unknown
);
516 if (col
>=ExtColNumber
) {
517 debuga(__FILE__
,__LINE__
,_("Too many columns in an extended log file format: %d columns found when %d have been announced\n"),col
,ExtColNumber
);
518 return(RLRC_Unknown
);
520 col_id
=ExtLog_WhichColumn(col
);
525 Line
=ExtLog_GetString(Line
,col
,&IpEnd
);
526 if (!Line
) return(RLRC_Unknown
);
528 case EXTCOL_UserName
:
529 Entry
->User
=User
=Line
;
530 Line
=ExtLog_GetString(Line
,col
,&UserEnd
);
531 if (!Line
) return(RLRC_Unknown
);
534 Line
=ExtLog_GetDate(Line
,&Entry
->EntryTime
);
535 if (!Line
) return(RLRC_Unknown
);
538 Line
=ExtLog_GetTime(Line
,&Entry
->EntryTime
);
539 if (!Line
) return(RLRC_Unknown
);
541 case EXTCOL_TimeTaken
:
542 Line
=ExtLog_GetLongInt(Line
,&Entry
->ElapsedTime
);
543 if (!Line
) return(RLRC_Unknown
);
546 Line
=ExtLog_GetLongLongInt(Line
,&Entry
->DataSize
);
547 if (!Line
) return(RLRC_Unknown
);
551 Line
=ExtLog_GetString(Line
,col
,&UrlEnd
);
552 if (!Line
) return(RLRC_Unknown
);
556 Line
=ExtLog_GetString(Line
,col
,&UrlSchemeEnd
);
557 if (!Line
) return(RLRC_Unknown
);
561 Line
=ExtLog_GetString(Line
,col
,&UrlHostEnd
);
562 if (!Line
) return(RLRC_Unknown
);
566 Line
=ExtLog_GetString(Line
,col
,&UrlPortEnd
);
567 if (!Line
) return(RLRC_Unknown
);
571 Line
=ExtLog_GetString(Line
,col
,&UrlPathEnd
);
572 if (!Line
) return(RLRC_Unknown
);
576 Line
=ExtLog_GetString(Line
,col
,&UrlQueryEnd
);
577 if (!Line
) return(RLRC_Unknown
);
580 Entry
->HttpCode
=Line
;
581 Line
=ExtLog_GetString(Line
,col
,&HttpCodeEnd
);
582 if (!Line
) return(RLRC_Unknown
);
584 case EXTCOL_Last
://ignored column
585 Line
=ExtLog_GetString(Line
,col
,NULL
);
586 if (!Line
) return(RLRC_Unknown
);
589 if (*Line
&& *Line
!=ExtColSep
[col
]) return(RLRC_Unknown
);
590 while (*Line
&& *Line
==ExtColSep
[col
]) Line
++;
593 if (col
!=ExtColNumber
) {
594 debuga(__FILE__
,__LINE__
,_("Only %d columns in an extended log file format when %d have been announced\n"),col
,ExtColNumber
);
595 return(RLRC_Unknown
);
598 // check the entry time
599 if (mktime(&Entry
->EntryTime
)==-1) {
600 debuga(__FILE__
,__LINE__
,_("Invalid date or time found in the extended log file\n"));
601 return(RLRC_InternalError
);
604 ExtLog_FixString(Ip
,IpEnd
);
605 ExtLog_FixString(User
,UserEnd
);
606 ExtLog_FixString(Entry
->Url
,UrlEnd
);
607 ExtLog_FixString(Entry
->HttpCode
,HttpCodeEnd
);
610 ExtLog_FixString(UrlScheme
,UrlSchemeEnd
);
611 ExtLog_FixString(UrlHost
,UrlHostEnd
);
612 ExtLog_FixString(UrlPort
,UrlPortEnd
);
613 ExtLog_FixString(UrlPath
,UrlPathEnd
);
614 ExtLog_FixString(UrlQuery
,UrlQueryEnd
);
615 Entry
->Url
=ExtLog_ConcatUrl(UrlScheme
,UrlHost
,UrlPort
,UrlPath
,UrlQuery
);
618 return(RLRC_NoError
);
621 //! \brief Object to read an extended log.
622 const struct ReadLogProcessStruct ReadExtLog
=
624 /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */
625 N_("extended log format"),