]> git.ipfire.org Git - thirdparty/sarg.git/blob - readlog_extlog.c
Rename configure.in as configure.ac
[thirdparty/sarg.git] / readlog_extlog.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2015
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
30
31 /*!
32 Maximum number of columns accepted in an extended log format.
33
34 The current value is an arbitrary number chosen to have an
35 actual limit.
36 */
37 #define MAX_EXT_COLUMNS 250
38
39 enum ext_col_id {
40 EXTCOL_Ip,
41 EXTCOL_UserName,
42 EXTCOL_Date,
43 EXTCOL_Time,
44 EXTCOL_TimeTaken,
45 EXTCOL_Bytes,
46 EXTCOL_Uri,
47 EXTCOL_Status,
48 EXTCOL_Last //last entry of the list !
49 };
50
51 //! \c True if the extended common long format is confirmed.
52 static bool InExtLog=false;
53 //! The index of relevant columns in the log file.
54 static int ExtCols[EXTCOL_Last];
55 //! The character to use as a columns separator.
56 static char ExtColSep[MAX_EXT_COLUMNS];
57 //! The number of columns according to the "fields" directive.
58 static int ExtColNumber;
59
60 /*!
61 A new file is being read. The name of the file is \a FileName.
62 */
63 static void ExtLog_NewFile(const char *FileName)
64 {
65 InExtLog=false;
66 ExtColNumber=0;
67 }
68
69 /*!
70 Parse the "Fields" directive listing the columns in the log. The
71 \a columns is a pointer to the first column of the directive.
72
73 \return \c True if the fields is valid or false if it could not
74 be decoded.
75 */
76 static bool ExtLog_Fields(const char *columns)
77 {
78 int col;
79 int len;
80 int prefix;
81 int header_start;
82 int header_end;
83 int i;
84 enum ext_col_id col_id;
85 char col_sep;
86 // see http://www.w3.org/TR/WD-logfile.html for the list of prefixes
87 const char const *prefixes[]=
88 {
89 "c",
90 "s",
91 "r",
92 "cs",
93 "sc",
94 "sr",
95 "rs",
96 "x",
97 };
98
99 for (i=0 ; i<EXTCOL_Last ; i++) ExtCols[i]=-1;
100
101 col=0;
102 while (*columns) {
103 if (col>=MAX_EXT_COLUMNS) {
104 debuga(__FILE__,__LINE__,_("Too many columns found in an extended log format. The maximum allowed is %d but it can be changed if a bigger value is legitimate\n"),MAX_EXT_COLUMNS);
105 exit(EXIT_FAILURE);
106 }
107 prefix=-1;
108 header_start=-1;
109 header_end=-1;
110 for (i=sizeof(prefixes)/sizeof(*prefixes)-1 ; i>=0 ; i--) {
111 len=strlen(prefixes[i]);
112 if (strncasecmp(columns,prefixes[i],len)==0) {
113 if (columns[len]=='-') {
114 prefix=len++;
115 break;
116 } else if (columns[len]=='(') {
117 header_start=len++;
118 break;
119 }
120 }
121 }
122 (void)prefix;//compiler pacifier
123 if (i<0) len=0;
124 for ( ; (unsigned char)columns[len]>' ' ; len++) {//skip a word and accept any separator (tab or space)
125 if (header_start>=0 && columns[len]==')') header_end=len;
126 }
127 (void)header_end;//compiler pacifier
128 col_sep=columns[len];
129 ExtColSep[col]=col_sep;
130
131 // see http://www.w3.org/TR/WD-logfile.html for list of possible identifiers
132 col_id=EXTCOL_Last;
133 if (len==4) {
134 if (strncasecmp(columns,"c-ip",len)==0 && ExtCols[EXTCOL_Ip]<0) col_id=EXTCOL_Ip;
135 else if (strncasecmp(columns,"date",len)==0) col_id=EXTCOL_Date;
136 else if (strncasecmp(columns,"time",len)==0) col_id=EXTCOL_Time;
137 } else if (len==5) {
138 if (strncasecmp(columns,"c-dns",len)==0) col_id=EXTCOL_Ip;
139 } else if (len==6) {
140 if (strncasecmp(columns,"cs-uri",len)==0) col_id=EXTCOL_Uri;
141 } else if (len==8) {
142 if (strncasecmp(columns,"sc-bytes",len)==0) col_id=EXTCOL_Bytes;
143 } else if (len==9) {
144 if (strncasecmp(columns,"sc-status",len)==0) col_id=EXTCOL_Status;
145 } else if (len==10) {
146 if (strncasecmp(columns,"time-taken",len)==0) col_id=EXTCOL_TimeTaken;
147 } else if (len==11) {
148 if (strncasecmp(columns,"cs-username",len)==0) col_id=EXTCOL_UserName;
149 }
150 if (col_id!=EXTCOL_Last) {
151 ExtCols[col_id]=col;
152 }
153
154 col++;
155 columns+=len;
156 while (*columns && (unsigned char)*columns<=' ') {
157 if (*columns!=col_sep) {
158 debuga(__FILE__,__LINE__,_("Multiple column separators found between two columns in the \"fields\" directive of an extended log format\n"));
159 exit(EXIT_FAILURE);
160 }
161 columns++;
162 }
163 }
164 ExtColNumber=col;
165 return(true);
166 }
167
168 /*!
169 Decode a directive field from the \a Line.
170
171 \return RLRC_Ignore if the line is a directive or RLRC_Unknown
172 if the line is not a known directive.
173 */
174 static enum ReadLogReturnCodeEnum ExtLog_Directive(const char *Line)
175 {
176 ++Line;
177 if (strncasecmp(Line,"Version:",8)==0) return(RLRC_Ignore);
178 if (strncasecmp(Line,"Software:",9)==0) return(RLRC_Ignore);
179 if (strncasecmp(Line,"Start-Date:",11)==0) return(RLRC_Ignore);
180 if (strncasecmp(Line,"End-Date:",9)==0) return(RLRC_Ignore);
181 if (strncasecmp(Line,"Date:",5)==0) return(RLRC_Ignore);
182 if (strncasecmp(Line,"Remark:",7)==0) return(RLRC_Ignore);
183 if (strncasecmp(Line,"Fields:",7)==0) {
184 Line+=7;
185 while (*Line==' ' || *Line=='\t') Line++;
186 if (!ExtLog_Fields(Line)) return(RLRC_Unknown);
187 return(RLRC_Ignore);
188 }
189 return(RLRC_Unknown);
190 }
191
192 /*!
193 Get the type of the column \a col_num.
194
195 \return The type of the column or EXTCOL_Last if
196 the column must be ignored.
197 */
198 static enum ext_col_id ExtLog_WhichColumn(int col_num)
199 {
200 int i;
201
202 for (i=0 ; i<EXTCOL_Last && ExtCols[i]!=col_num ; i++);
203 return(i);
204 }
205
206 /*!
207 Scan through the string of a column.
208
209 \param Line The pointer to the beginning of the string.
210 \param col The column number.
211 */
212 static char *ExtLog_GetString(char *Line,int col,char **End)
213 {
214 bool quote;
215 bool dequote;
216
217 //skip opening double quote
218 quote=(*Line=='\"');
219 if (quote) ++Line;
220
221 dequote=false;
222 while (*Line) {
223 if (quote) {
224 if (*Line=='\"') {
225 if (Line[1]!='\"') {
226 if (End) *End=(dequote) ? NULL : Line;
227 Line++;//skip the closing quote
228 quote=false;
229 break;
230 }
231 dequote=true;
232 }
233 } else {
234 if (*Line==ExtColSep[col]) {
235 if (End) *End=Line;
236 break;
237 }
238 }
239 Line++;
240 }
241 if (quote) return(NULL);//missing closing quote.
242 return(Line);
243 }
244
245 /*!
246 Scan through the date in a column.
247
248 \param Line The pointer to the beginning of the string.
249 */
250 static char *ExtLog_GetDate(char *Line,struct tm *Date)
251 {
252 bool quote;
253 int year;
254 int month;
255 int day;
256 int next;
257
258 //skip opening double quote
259 quote=(*Line=='\"');
260 if (quote) ++Line;
261 if (sscanf(Line,"%d-%d-%d%n",&year,&month,&day,&next)!=3) return(NULL);
262 Line+=next;
263 if (quote) {
264 if (*Line!='\"') return(NULL);//missing closing quote.
265 ++Line;
266 }
267 Date->tm_year=year-1900;
268 Date->tm_mon=month-1;
269 Date->tm_mday=day;
270 return(Line);
271 }
272
273 /*!
274 Scan through the time in a column.
275
276 \param Line The pointer to the beginning of the string.
277 */
278 static char *ExtLog_GetTime(char *Line,struct tm *Date)
279 {
280 bool quote;
281 int hour;
282 int minute;
283 int second;
284 int next;
285
286 //skip opening double quote
287 quote=(*Line=='\"');
288 if (quote) ++Line;
289 if (sscanf(Line,"%d:%d:%d%n",&hour,&minute,&second,&next)!=3) return(NULL);
290 Line+=next;
291 if (quote) {
292 if (*Line!='\"') return(NULL);//missing closing quote.
293 ++Line;
294 }
295 Date->tm_hour=hour;
296 Date->tm_min=minute;
297 Date->tm_sec=second;
298 return(Line);
299 }
300
301 /*!
302 Scan through a number in a column.
303
304 \param Line The pointer to the beginning of the string.
305 \param Value A variable to store the number.
306 */
307 static char *ExtLog_GetLongInt(char *Line,long int *Value)
308 {
309 bool quote;
310
311 //skip opening double quote
312 quote=(*Line=='\"');
313 if (quote) ++Line;
314 *Value=0;
315 while (isdigit(*Line)) *Value=*Value*10+(*Line++-'0');
316 if (quote) {
317 if (*Line!='\"') return(NULL);//missing closing quote.
318 ++Line;
319 }
320 return(Line);
321 }
322
323 /*!
324 Scan through a number in a column.
325
326 \param Line The pointer to the beginning of the string.
327 \param Value A variable to store the number.
328 */
329 static char *ExtLog_GetLongLongInt(char *Line,long long int *Value)
330 {
331 bool quote;
332
333 //skip opening double quote
334 quote=(*Line=='\"');
335 if (quote) ++Line;
336 *Value=0;
337 while (isdigit(*Line)) *Value=*Value*10+(*Line++-'0');
338 if (quote) {
339 if (*Line!='\"') return(NULL);//missing closing quote.
340 ++Line;
341 }
342 return(Line);
343 }
344
345 /*!
346 Remove the quotes inside the \a string. If no quotes are known to
347 be in the string, the \a end_ptr is the pointer to the last
348 character of the string.
349 */
350 static void ExtLog_FixString(char *string,char *end_ptr)
351 {
352 char *dest;
353
354 if (!string) return;//string not parsed
355 if (end_ptr) { //end is known and no quotes are in the string
356 *end_ptr='\0';
357 return;
358 }
359 // remove the quotes and end at the first unremoveable quote
360 dest=string;
361 while (*string)
362 {
363 if (*string=='\"') {
364 if (string[1]!='\"') break; //closing quote
365 string++;//skip the first quote
366 }
367 *dest++=*string++;
368 }
369 *dest='\0';
370 }
371
372 /*!
373 Read one entry from an extended log.
374
375 \param Line One line from the input log file.
376 \param Entry Where to store the information parsed from the line.
377
378 \retval RLRC_NoError One valid entry is parsed.
379 \retval RLRC_Unknown The line is invalid.
380 \retval RLRC_InternalError An internal error was encountered.
381 */
382 static enum ReadLogReturnCodeEnum ExtLog_ReadEntry(char *Line,struct ReadLogStruct *Entry)
383 {
384 int col;
385 enum ext_col_id col_id;
386 char *Ip=NULL;
387 char *IpEnd;
388 char *User=NULL;
389 char *UserEnd;
390 char *UrlEnd;
391 char *HttpCodeEnd;
392
393 // is it a directive
394 if (*Line=='#') {
395 enum ReadLogReturnCodeEnum status=ExtLog_Directive(Line);
396 if (status!=RLRC_Unknown) InExtLog=true;
397 return(status);
398 }
399 if (!InExtLog) return(RLRC_Unknown);
400
401 col=0;
402 while (*Line) {
403 if (col>=ExtColNumber) {
404 debuga(__FILE__,__LINE__,_("Too many columns in an extended log file format: %d columns found when %d have been announced\n"),col,ExtColNumber);
405 return(RLRC_Unknown);
406 }
407 col_id=ExtLog_WhichColumn(col);
408 switch (col_id)
409 {
410 case EXTCOL_Ip:
411 Entry->Ip=Ip=Line;
412 Line=ExtLog_GetString(Line,col,&IpEnd);
413 if (!Line) return(RLRC_Unknown);
414 break;
415 case EXTCOL_UserName:
416 Entry->User=User=Line;
417 Line=ExtLog_GetString(Line,col,&UserEnd);
418 if (!Line) return(RLRC_Unknown);
419 break;
420 case EXTCOL_Date:
421 Line=ExtLog_GetDate(Line,&Entry->EntryTime);
422 if (!Line) return(RLRC_Unknown);
423 break;
424 case EXTCOL_Time:
425 Line=ExtLog_GetTime(Line,&Entry->EntryTime);
426 if (!Line) return(RLRC_Unknown);
427 break;
428 case EXTCOL_TimeTaken:
429 Line=ExtLog_GetLongInt(Line,&Entry->ElapsedTime);
430 if (!Line) return(RLRC_Unknown);
431 break;
432 case EXTCOL_Bytes:
433 Line=ExtLog_GetLongLongInt(Line,&Entry->DataSize);
434 if (!Line) return(RLRC_Unknown);
435 break;
436 case EXTCOL_Uri:
437 Entry->Url=Line;
438 Line=ExtLog_GetString(Line,col,&UrlEnd);
439 if (!Line) return(RLRC_Unknown);
440 break;
441 case EXTCOL_Status:
442 Entry->HttpCode=Line;
443 Line=ExtLog_GetString(Line,col,&HttpCodeEnd);
444 if (!Line) return(RLRC_Unknown);
445 break;
446 case EXTCOL_Last://ignored column
447 Line=ExtLog_GetString(Line,col,NULL);
448 if (!Line) return(RLRC_Unknown);
449 break;
450 }
451 if (*Line && *Line!=ExtColSep[col]) return(RLRC_Unknown);
452 while (*Line && *Line==ExtColSep[col]) Line++;
453 col++;
454 }
455 if (col!=ExtColNumber) {
456 debuga(__FILE__,__LINE__,_("Only %d columns in an extended log file format when %d have been announced\n"),col,ExtColNumber);
457 return(RLRC_Unknown);
458 }
459
460 // check the entry time
461 if (mktime(&Entry->EntryTime)==-1) {
462 debuga(__FILE__,__LINE__,_("Invalid date or time found in the extended log file\n"));
463 return(RLRC_InternalError);
464 }
465
466 ExtLog_FixString(Ip,IpEnd);
467 ExtLog_FixString(User,UserEnd);
468 ExtLog_FixString(Entry->Url,UrlEnd);
469 ExtLog_FixString(Entry->HttpCode,HttpCodeEnd);
470
471 return(RLRC_NoError);
472 }
473
474 //! \brief Object to read an extended log.
475 const struct ReadLogProcessStruct ReadExtLog=
476 {
477 /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */
478 N_("extended log format"),
479 ExtLog_NewFile,
480 ExtLog_ReadEntry
481 };