2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
32 Maximum number of columns accepted in an extended log format.
34 The current value is an arbitrary number chosen to have an
37 #define MAX_EXT_COLUMNS 250
54 EXTCOL_Last
//last entry of the list !
57 //! \c True if the extended common long format is confirmed.
58 static bool InExtLog
=false;
59 //! The index of relevant columns in the log file.
60 static int ExtCols
[EXTCOL_Last
];
61 //! The character to use as a columns separator.
62 static char ExtColSep
[MAX_EXT_COLUMNS
];
63 //! The number of columns according to the "fields" directive.
64 static int ExtColNumber
;
65 //! Temporary buffer to concatenate the url.
66 static char ExtTempUrl
[MAX_URL_LEN
];
69 A new file is being read. The name of the file is \a FileName.
71 static void ExtLog_NewFile(const char *FileName
)
78 Parse the "Fields" directive listing the columns in the log. The
79 \a columns is a pointer to the first column of the directive.
81 \return \c True if the fields is valid or false if it could not
84 static bool ExtLog_Fields(const char *columns
)
92 enum ext_col_id col_id
;
94 // see http://www.w3.org/TR/WD-logfile.html for the list of prefixes
95 const char const *prefixes
[]=
107 for (i
=0 ; i
<EXTCOL_Last
; i
++) ExtCols
[i
]=-1;
111 if (col
>=MAX_EXT_COLUMNS
) {
112 debuga(__FILE__
,__LINE__
,_("Too many columns found in an extended log format. The maximum allowed is %d but it can be changed if a bigger value is legitimate\n"),MAX_EXT_COLUMNS
);
118 for (i
=sizeof(prefixes
)/sizeof(*prefixes
)-1 ; i
>=0 ; i
--) {
119 len
=strlen(prefixes
[i
]);
120 if (strncasecmp(columns
,prefixes
[i
],len
)==0) {
121 if (columns
[len
]=='-') {
124 } else if (columns
[len
]=='(') {
130 (void)prefix
;//compiler pacifier
132 for ( ; (unsigned char)columns
[len
]>' ' ; len
++) {//skip a word and accept any separator (tab or space)
133 if (header_start
>=0 && columns
[len
]==')') header_end
=len
;
135 (void)header_end
;//compiler pacifier
136 col_sep
=columns
[len
];
137 ExtColSep
[col
]=col_sep
;
139 // see http://www.w3.org/TR/WD-logfile.html for list of possible identifiers
142 if (strncasecmp(columns
,"c-ip",len
)==0 && ExtCols
[EXTCOL_Ip
]<0) col_id
=EXTCOL_Ip
;
143 else if (strncasecmp(columns
,"date",len
)==0) col_id
=EXTCOL_Date
;
144 else if (strncasecmp(columns
,"time",len
)==0) col_id
=EXTCOL_Time
;
146 if (strncasecmp(columns
,"c-dns",len
)==0) col_id
=EXTCOL_Ip
;
148 if (strncasecmp(columns
,"cs-uri",len
)==0) col_id
=EXTCOL_Uri
;
150 if (strncasecmp(columns
,"cs-host",len
)==0) col_id
=EXTCOL_Host
;
152 if (strncasecmp(columns
,"sc-bytes",len
)==0) col_id
=EXTCOL_Bytes
;
154 if (strncasecmp(columns
,"sc-status",len
)==0) col_id
=EXTCOL_Status
;
155 } else if (len
==10) {
156 if (strncasecmp(columns
,"time-taken",len
)==0) col_id
=EXTCOL_TimeTaken
;
157 } else if (len
==11) {
158 if (strncasecmp(columns
,"cs-username",len
)==0) col_id
=EXTCOL_UserName
;
159 if (strncasecmp(columns
,"cs-uri-port",len
)==0) col_id
=EXTCOL_Port
;
160 if (strncasecmp(columns
,"cs-uri-path",len
)==0) col_id
=EXTCOL_Path
;
161 } else if (len
==12) {
162 if (strncasecmp(columns
,"cs-uri-query",len
)==0) col_id
=EXTCOL_Query
;
163 } else if (len
==13) {
164 if (strncasecmp(columns
,"cs-uri-scheme",len
)==0) col_id
=EXTCOL_Scheme
;
165 } else if (len
==14) {
166 if (strncasecmp(columns
,"cs(User-Agent)",len
)==0) col_id
=EXTCOL_UserAgent
;
168 if (col_id
!=EXTCOL_Last
) {
174 while (*columns
&& (unsigned char)*columns
<=' ') {
175 if (*columns
!=col_sep
) {
176 debuga(__FILE__
,__LINE__
,_("Multiple column separators found between two columns in the \"fields\" directive of an extended log format\n"));
187 Decode a directive field from the \a Line.
189 \return RLRC_Ignore if the line is a directive or RLRC_Unknown
190 if the line is not a known directive.
192 static enum ReadLogReturnCodeEnum
ExtLog_Directive(const char *Line
)
195 if (strncasecmp(Line
,"Version:",8)==0) return(RLRC_Ignore
);
196 if (strncasecmp(Line
,"Software:",9)==0) return(RLRC_Ignore
);
197 if (strncasecmp(Line
,"Start-Date:",11)==0) return(RLRC_Ignore
);
198 if (strncasecmp(Line
,"End-Date:",9)==0) return(RLRC_Ignore
);
199 if (strncasecmp(Line
,"Date:",5)==0) return(RLRC_Ignore
);
200 if (strncasecmp(Line
,"Remark:",7)==0) return(RLRC_Ignore
);
201 if (strncasecmp(Line
,"Fields:",7)==0) {
203 while (*Line
==' ' || *Line
=='\t') Line
++;
204 if (!ExtLog_Fields(Line
)) return(RLRC_Unknown
);
207 return(RLRC_Unknown
);
211 Get the type of the column \a col_num.
213 \return The type of the column or EXTCOL_Last if
214 the column must be ignored.
216 static enum ext_col_id
ExtLog_WhichColumn(int col_num
)
220 for (i
=0 ; i
<EXTCOL_Last
&& ExtCols
[i
]!=col_num
; i
++);
225 Scan through the string of a column.
227 \param Line The pointer to the beginning of the string.
228 \param col The column number.
230 static char *ExtLog_GetString(char *Line
,int col
,char **End
)
234 //skip opening double quote
242 Line
++;//skip the first quote here, the second is skipped by the other Line++
245 Line
++;//skip closing quote
251 if (*Line
==ExtColSep
[col
]) {
258 if (quote
) return(NULL
);//missing closing quote.
263 Scan through the date in a column.
265 \param Line The pointer to the beginning of the string.
267 static char *ExtLog_GetDate(char *Line
,struct tm
*Date
)
275 //skip opening double quote
278 if (sscanf(Line
,"%d-%d-%d%n",&year
,&month
,&day
,&next
)!=3) return(NULL
);
281 if (*Line
!='\"') return(NULL
);//missing closing quote.
284 Date
->tm_year
=year
-1900;
285 Date
->tm_mon
=month
-1;
291 Scan through the time in a column.
293 \param Line The pointer to the beginning of the string.
295 static char *ExtLog_GetTime(char *Line
,struct tm
*Date
)
303 //skip opening double quote
306 if (sscanf(Line
,"%d:%d:%d%n",&hour
,&minute
,&second
,&next
)!=3) return(NULL
);
309 if (*Line
!='\"') return(NULL
);//missing closing quote.
319 Scan through a number in a column.
321 \param Line The pointer to the beginning of the string.
322 \param Value A variable to store the number.
324 static char *ExtLog_GetLongInt(char *Line
,long int *Value
)
328 //skip opening double quote
332 while (isdigit(*Line
)) *Value
=*Value
*10+(*Line
++-'0');
334 if (*Line
!='\"') return(NULL
);//missing closing quote.
341 Scan through a number in a column.
343 \param Line The pointer to the beginning of the string.
344 \param Value A variable to store the number.
346 static char *ExtLog_GetLongLongInt(char *Line
,long long int *Value
)
350 //skip opening double quote
354 while (isdigit(*Line
)) *Value
=*Value
*10+(*Line
++-'0');
356 if (*Line
!='\"') return(NULL
);//missing closing quote.
363 Remove the quotes inside the \a string. If no quotes are known to
364 be in the string, the \a end_ptr is the pointer to the last
365 character of the string.
367 static void ExtLog_FixString(char *string
,char *end_ptr
)
371 if (!string
) return;//string not parsed
372 if (*string
!='\"' && end_ptr
) { //no quotes to remove from the string
377 // remove first quote
379 if (string
[1]!='\"') string
++;
381 // remove the quotes and end at the first unremoveable quote
385 if (string
[1]!='\"') break; //closing quote
386 string
++;//skip the first quote
394 * Discard a empty string.
396 * An empty string may contain a single dash.
398 * \param String The string to check.
400 * \return The string pointer if it isn't empty or NULL if the string
403 static const char *ExtLog_FixEmptyString(const char *String
)
405 if (String
&& (String
[0]=='\0' || (String
[0]=='-' && String
[1]=='\0'))) String
=NULL
;
410 * Create the URL from the split elements.
412 static char *ExtLog_ConcatUrl(const char *Scheme
,const char *Host
,const char *Port
,const char *Path
,const char *Query
)
417 Scheme
=ExtLog_FixEmptyString(Scheme
);
418 Host
=ExtLog_FixEmptyString(Host
);
419 if (!Scheme
&& !Host
)
422 * Example of such an entry:
425 * date time time-taken c-ip sc-status s-action sc-bytes cs-bytes cs-method cs-uri-scheme cs-host cs-uri-port cs-uri-path cs-uri-query cs-username cs-auth-group s-hierarchy s-supplier-name rs(Content-Type) cs(Referer) cs(User-Agent) sc-filter-result cs-categories x-virus-id s-ip
426 * 2015-07-29 06:05:50 30 192.168.1.21 400 TCP_NC_MISS 903 8163 unknown - - 0 / - userid - - 10.81.0.26 - - - DENIED "unavailable" - 10.81.0.26 - - ICAP_NOT_SCANNED - - -
428 * It looks like a failed connection attempt to an unavailable resource. Let's assume it is safe to ignore it.
433 Port
=ExtLog_FixEmptyString(Port
);
434 Path
=ExtLog_FixEmptyString(Path
);
435 Query
=ExtLog_FixEmptyString(Query
);
440 if (tlen
+len
+3>=sizeof(ExtTempUrl
))
442 debuga(__FILE__
,__LINE__
,_("URI scheme too long in log file\n"));
445 strcpy(ExtTempUrl
,Scheme
);
446 strcpy(ExtTempUrl
+len
,"://");
453 if (tlen
+len
>=sizeof(ExtTempUrl
)) len
=sizeof(ExtTempUrl
)-tlen
-1;
454 strncpy(ExtTempUrl
+tlen
,Host
,len
);
458 if (tlen
+2<sizeof(ExtTempUrl
) && Port
)
461 if (tlen
+len
+1>=sizeof(ExtTempUrl
)) len
=sizeof(ExtTempUrl
)-tlen
-2;
462 ExtTempUrl
[tlen
++]=':';
463 strncpy(ExtTempUrl
+tlen
,Port
,len
);
467 if (tlen
<sizeof(ExtTempUrl
) && Path
)
470 if (tlen
+len
>=sizeof(ExtTempUrl
)) len
=sizeof(ExtTempUrl
)-tlen
-1;
471 strncpy(ExtTempUrl
+tlen
,Path
,len
);
475 if (tlen
<sizeof(ExtTempUrl
) && Query
)
478 if (tlen
+len
>=sizeof(ExtTempUrl
)) len
=sizeof(ExtTempUrl
)-tlen
-1;
479 strncpy(ExtTempUrl
+tlen
,Query
,len
);
482 ExtTempUrl
[tlen
]='\0';
487 Read one entry from an extended log.
489 \param Line One line from the input log file.
490 \param Entry Where to store the information parsed from the line.
492 \retval RLRC_NoError One valid entry is parsed.
493 \retval RLRC_Unknown The line is invalid.
494 \retval RLRC_InternalError An internal error was encountered.
496 static enum ReadLogReturnCodeEnum
ExtLog_ReadEntry(char *Line
,struct ReadLogStruct
*Entry
)
499 enum ext_col_id col_id
;
506 char *UrlScheme
=NULL
,*UrlSchemeEnd
;
507 char *UrlHost
=NULL
,*UrlHostEnd
;
508 char *UrlPort
=NULL
,*UrlPortEnd
;
509 char *UrlPath
=NULL
,*UrlPathEnd
;
510 char *UrlQuery
=NULL
,*UrlQueryEnd
;
511 char *UserAgent
=NULL
,*UserAgentEnd
;
515 enum ReadLogReturnCodeEnum status
=ExtLog_Directive(Line
);
516 if (status
!=RLRC_Unknown
) InExtLog
=true;
519 if (!InExtLog
) return(RLRC_Unknown
);
523 if (col
>=ExtColNumber
) {
524 debuga(__FILE__
,__LINE__
,_("Too many columns in an extended log file format: %d columns found when %d have been announced\n"),col
,ExtColNumber
);
525 return(RLRC_Unknown
);
527 col_id
=ExtLog_WhichColumn(col
);
532 Line
=ExtLog_GetString(Line
,col
,&IpEnd
);
533 if (!Line
) return(RLRC_Unknown
);
535 case EXTCOL_UserName
:
536 Entry
->User
=User
=Line
;
537 Line
=ExtLog_GetString(Line
,col
,&UserEnd
);
538 if (!Line
) return(RLRC_Unknown
);
541 Line
=ExtLog_GetDate(Line
,&Entry
->EntryTime
);
542 if (!Line
) return(RLRC_Unknown
);
545 Line
=ExtLog_GetTime(Line
,&Entry
->EntryTime
);
546 if (!Line
) return(RLRC_Unknown
);
548 case EXTCOL_TimeTaken
:
549 Line
=ExtLog_GetLongInt(Line
,&Entry
->ElapsedTime
);
550 if (!Line
) return(RLRC_Unknown
);
553 Line
=ExtLog_GetLongLongInt(Line
,&Entry
->DataSize
);
554 if (!Line
) return(RLRC_Unknown
);
558 Line
=ExtLog_GetString(Line
,col
,&UrlEnd
);
559 if (!Line
) return(RLRC_Unknown
);
563 Line
=ExtLog_GetString(Line
,col
,&UrlSchemeEnd
);
564 if (!Line
) return(RLRC_Unknown
);
568 Line
=ExtLog_GetString(Line
,col
,&UrlHostEnd
);
569 if (!Line
) return(RLRC_Unknown
);
573 Line
=ExtLog_GetString(Line
,col
,&UrlPortEnd
);
574 if (!Line
) return(RLRC_Unknown
);
578 Line
=ExtLog_GetString(Line
,col
,&UrlPathEnd
);
579 if (!Line
) return(RLRC_Unknown
);
583 Line
=ExtLog_GetString(Line
,col
,&UrlQueryEnd
);
584 if (!Line
) return(RLRC_Unknown
);
587 Entry
->HttpCode
=Line
;
588 Line
=ExtLog_GetString(Line
,col
,&HttpCodeEnd
);
589 if (!Line
) return(RLRC_Unknown
);
591 case EXTCOL_UserAgent
:
593 Line
=ExtLog_GetString(Line
,col
,&UserAgentEnd
);
594 if (!Line
) return(RLRC_Unknown
);
596 case EXTCOL_Last
://ignored column
597 Line
=ExtLog_GetString(Line
,col
,NULL
);
598 if (!Line
) return(RLRC_Unknown
);
601 if (*Line
&& *Line
!=ExtColSep
[col
]) return(RLRC_Unknown
);
602 while (*Line
&& *Line
==ExtColSep
[col
]) Line
++;
605 if (col
!=ExtColNumber
) {
606 debuga(__FILE__
,__LINE__
,_("Only %d columns in an extended log file format when %d have been announced\n"),col
,ExtColNumber
);
607 return(RLRC_Unknown
);
610 // check the entry time
611 if (mktime(&Entry
->EntryTime
)==-1) {
612 debuga(__FILE__
,__LINE__
,_("Invalid date or time found in the extended log file\n"));
613 return(RLRC_InternalError
);
616 ExtLog_FixString(Ip
,IpEnd
);
617 ExtLog_FixString(User
,UserEnd
);
618 ExtLog_FixString(Entry
->Url
,UrlEnd
);
619 ExtLog_FixString(Entry
->HttpCode
,HttpCodeEnd
);
622 ExtLog_FixString(UrlScheme
,UrlSchemeEnd
);
623 ExtLog_FixString(UrlHost
,UrlHostEnd
);
624 ExtLog_FixString(UrlPort
,UrlPortEnd
);
625 ExtLog_FixString(UrlPath
,UrlPathEnd
);
626 ExtLog_FixString(UrlQuery
,UrlQueryEnd
);
627 Entry
->Url
=ExtLog_ConcatUrl(UrlScheme
,UrlHost
,UrlPort
,UrlPath
,UrlQuery
);
629 ExtLog_FixString(UserAgent
,UserAgentEnd
);
630 Entry
->UserAgent
=ExtLog_FixEmptyString(UserAgent
);
632 return(RLRC_NoError
);
635 //! \brief Object to read an extended log.
636 const struct ReadLogProcessStruct ReadExtLog
=
638 /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */
639 N_("extended log format"),