]> git.ipfire.org Git - thirdparty/sarg.git/blob - readlog_extlog.c
Strip the user suffix from the redirector log
[thirdparty/sarg.git] / readlog_extlog.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2015
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
30
31 /*!
32 Maximum number of columns accepted in an extended log format.
33
34 The current value is an arbitrary number chosen to have an
35 actual limit.
36 */
37 #define MAX_EXT_COLUMNS 250
38
39 enum ext_col_id {
40 EXTCOL_Ip,
41 EXTCOL_UserName,
42 EXTCOL_Date,
43 EXTCOL_Time,
44 EXTCOL_TimeTaken,
45 EXTCOL_Bytes,
46 EXTCOL_Uri,
47 EXTCOL_Status,
48 EXTCOL_Last //last entry of the list !
49 };
50
51 //! \c True if the extended common long format is confirmed.
52 static bool InExtLog=false;
53 //! The index of relevant columns in the log file.
54 static int ExtCols[EXTCOL_Last];
55 //! The character to use as a columns separator.
56 static char ExtColSep[MAX_EXT_COLUMNS];
57 //! The number of columns according to the "fields" directive.
58 static int ExtColNumber;
59
60 /*!
61 A new file is being read. The name of the file is \a FileName.
62 */
63 static void ExtLog_NewFile(const char *FileName)
64 {
65 InExtLog=false;
66 ExtColNumber=0;
67 }
68
69 /*!
70 Parse the "Fields" directive listing the columns in the log. The
71 \a columns is a pointer to the first column of the directive.
72
73 \return \c True if the fields is valid or false if it could not
74 be decoded.
75 */
76 static bool ExtLog_Fields(const char *columns)
77 {
78 int col;
79 int len;
80 int prefix;
81 int header_start;
82 int header_end;
83 int i;
84 enum ext_col_id col_id;
85 char col_sep;
86 // see http://www.w3.org/TR/WD-logfile.html for the list of prefixes
87 const char const *prefixes[]=
88 {
89 "c",
90 "s",
91 "r",
92 "cs",
93 "sc",
94 "sr",
95 "rs",
96 "x",
97 };
98
99 for (i=0 ; i<EXTCOL_Last ; i++) ExtCols[i]=-1;
100
101 col=0;
102 while (*columns) {
103 if (col>=MAX_EXT_COLUMNS) {
104 debuga(__FILE__,__LINE__,_("Too many columns found in an extended log format. The maximum allowed is %d but it can be changed if a bigger value is legitimate\n"),MAX_EXT_COLUMNS);
105 exit(EXIT_FAILURE);
106 }
107 prefix=-1;
108 header_start=-1;
109 header_end=-1;
110 for (i=sizeof(prefixes)/sizeof(*prefixes)-1 ; i>=0 ; i--) {
111 len=strlen(prefixes[i]);
112 if (strncasecmp(columns,prefixes[i],len)==0) {
113 if (columns[len]=='-') {
114 prefix=len++;
115 break;
116 } else if (columns[len]=='(') {
117 header_start=len++;
118 break;
119 }
120 }
121 }
122 (void)prefix;//compiler pacifier
123 if (i<0) len=0;
124 for ( ; (unsigned char)columns[len]>' ' ; len++) {//skip a word and accept any separator (tab or space)
125 if (header_start>=0 && columns[len]==')') header_end=len;
126 }
127 (void)header_end;//compiler pacifier
128 col_sep=columns[len];
129 ExtColSep[col]=col_sep;
130
131 // see http://www.w3.org/TR/WD-logfile.html for list of possible identifiers
132 col_id=EXTCOL_Last;
133 if (len==4) {
134 if (strncasecmp(columns,"c-ip",len)==0 && ExtCols[EXTCOL_Ip]<0) col_id=EXTCOL_Ip;
135 else if (strncasecmp(columns,"date",len)==0) col_id=EXTCOL_Date;
136 else if (strncasecmp(columns,"time",len)==0) col_id=EXTCOL_Time;
137 } else if (len==5) {
138 if (strncasecmp(columns,"c-dns",len)==0) col_id=EXTCOL_Ip;
139 } else if (len==6) {
140 if (strncasecmp(columns,"cs-uri",len)==0) col_id=EXTCOL_Uri;
141 } else if (len==8) {
142 if (strncasecmp(columns,"sc-bytes",len)==0) col_id=EXTCOL_Bytes;
143 } else if (len==9) {
144 if (strncasecmp(columns,"sc-status",len)==0) col_id=EXTCOL_Status;
145 } else if (len==10) {
146 if (strncasecmp(columns,"time-taken",len)==0) col_id=EXTCOL_TimeTaken;
147 } else if (len==11) {
148 if (strncasecmp(columns,"cs-username",len)==0) col_id=EXTCOL_UserName;
149 }
150 if (col_id!=EXTCOL_Last) {
151 ExtCols[col_id]=col;
152 }
153
154 col++;
155 columns+=len;
156 while (*columns && (unsigned char)*columns<=' ') {
157 if (*columns!=col_sep) {
158 debuga(__FILE__,__LINE__,_("Multiple column separators found between two columns in the \"fields\" directive of an extended log format\n"));
159 exit(EXIT_FAILURE);
160 }
161 columns++;
162 }
163 }
164 ExtColNumber=col;
165 return(true);
166 }
167
168 /*!
169 Decode a directive field from the \a Line.
170
171 \return RLRC_Ignore if the line is a directive or RLRC_Unknown
172 if the line is not a known directive.
173 */
174 static enum ReadLogReturnCodeEnum ExtLog_Directive(const char *Line)
175 {
176 ++Line;
177 if (strncasecmp(Line,"Version:",8)==0) return(RLRC_Ignore);
178 if (strncasecmp(Line,"Software:",9)==0) return(RLRC_Ignore);
179 if (strncasecmp(Line,"Start-Date:",11)==0) return(RLRC_Ignore);
180 if (strncasecmp(Line,"End-Date:",9)==0) return(RLRC_Ignore);
181 if (strncasecmp(Line,"Date:",5)==0) return(RLRC_Ignore);
182 if (strncasecmp(Line,"Remark:",7)==0) return(RLRC_Ignore);
183 if (strncasecmp(Line,"Fields:",7)==0) {
184 Line+=7;
185 while (*Line==' ' || *Line=='\t') Line++;
186 if (!ExtLog_Fields(Line)) return(RLRC_Unknown);
187 return(RLRC_Ignore);
188 }
189 return(RLRC_Unknown);
190 }
191
192 /*!
193 Get the type of the column \a col_num.
194
195 \return The type of the column or EXTCOL_Last if
196 the column must be ignored.
197 */
198 static enum ext_col_id ExtLog_WhichColumn(int col_num)
199 {
200 int i;
201
202 for (i=0 ; i<EXTCOL_Last && ExtCols[i]!=col_num ; i++);
203 return(i);
204 }
205
206 /*!
207 Scan through the string of a column.
208
209 \param Line The pointer to the beginning of the string.
210 \param col The column number.
211 */
212 static char *ExtLog_GetString(char *Line,int col,char **End)
213 {
214 bool quote;
215 bool dequote;
216
217 //skip opening double quote
218 quote=(*Line=='\"');
219 if (quote) ++Line;
220
221 dequote=false;
222 while (*Line) {
223 if (quote) {
224 if (*Line=='\"') {
225 if (Line[1]!='\"') {
226 if (End) *End=(dequote) ? NULL : Line;
227 Line++;//skip the closing quote
228 quote=false;
229 break;
230 }
231 dequote=true;
232 }
233 } else {
234 if (*Line==ExtColSep[col]) {
235 if (End) *End=Line;
236 break;
237 }
238 }
239 Line++;
240 }
241 if (quote) return(NULL);//missing closing quote.
242 return(Line);
243 }
244
245 /*!
246 Scan through the date in a column.
247
248 \param Line The pointer to the beginning of the string.
249 */
250 static char *ExtLog_GetDate(char *Line,struct tm *Date)
251 {
252 bool quote;
253 int year;
254 int month;
255 int day;
256 int next;
257
258 //skip opening double quote
259 quote=(*Line=='\"');
260 if (quote) ++Line;
261 if (sscanf(Line,"%d-%d-%d%n",&year,&month,&day,&next)!=3) return(NULL);
262 Line+=next;
263 if (quote) {
264 if (*Line!='\"') return(NULL);//missing closing quote.
265 ++Line;
266 }
267 Date->tm_year=year-1900;
268 Date->tm_mon=month-1;
269 Date->tm_mday=day;
270 return(Line);
271 }
272
273 /*!
274 Scan through the time in a column.
275
276 \param Line The pointer to the beginning of the string.
277 */
278 static char *ExtLog_GetTime(char *Line,struct tm *Date)
279 {
280 bool quote;
281 int hour;
282 int minute;
283 int second;
284 int next;
285
286 //skip opening double quote
287 quote=(*Line=='\"');
288 if (quote) ++Line;
289 if (sscanf(Line,"%d:%d:%d%n",&hour,&minute,&second,&next)!=3) return(NULL);
290 Line+=next;
291 if (quote) {
292 if (*Line!='\"') return(NULL);//missing closing quote.
293 ++Line;
294 }
295 Date->tm_hour=hour;
296 Date->tm_min=minute;
297 Date->tm_sec=second;
298 return(Line);
299 }
300
301 /*!
302 Scan through a number in a column.
303
304 \param Line The pointer to the beginning of the string.
305 \param Value A variable to store the number.
306 */
307 static char *ExtLog_GetLongInt(char *Line,long int *Value)
308 {
309 bool quote;
310
311 //skip opening double quote
312 quote=(*Line=='\"');
313 if (quote) ++Line;
314 *Value=0;
315 while (isdigit(*Line)) *Value=*Value*10+(*Line++-'0');
316 if (quote) {
317 if (*Line!='\"') return(NULL);//missing closing quote.
318 ++Line;
319 }
320 return(Line);
321 }
322
323 /*!
324 Scan through a number in a column.
325
326 \param Line The pointer to the beginning of the string.
327 \param Value A variable to store the number.
328 */
329 static char *ExtLog_GetLongLongInt(char *Line,long long int *Value)
330 {
331 bool quote;
332
333 //skip opening double quote
334 quote=(*Line=='\"');
335 if (quote) ++Line;
336 *Value=0;
337 while (isdigit(*Line)) *Value=*Value*10+(*Line++-'0');
338 if (quote) {
339 if (*Line!='\"') return(NULL);//missing closing quote.
340 ++Line;
341 }
342 return(Line);
343 }
344
345 /*!
346 Remove the quotes inside the \a string. If no quotes are known to
347 be in the string, the \a end_ptr is the pointer to the last
348 character of the string.
349 */
350 static void ExtLog_FixString(char *string,char *end_ptr)
351 {
352 char *dest;
353
354 if (!string) return;//string not parsed
355 if (end_ptr) { //end is known and no quotes are in the string
356 *end_ptr='\0';
357 return;
358 }
359 // remove the quotes and end at the first unremoveable quote
360 dest=string;
361 while (*string)
362 {
363 if (*string=='\"') {
364 if (string[1]!='\"') break; //closing quote
365 string++;//skip the first quote
366 }
367 *dest++=*string++;
368 }
369 *dest='\0';
370 }
371
372 /*!
373 Read one entry from an extended log.
374
375 \param Line One line from the input log file.
376 \param Entry Where to store the information parsed from the line.
377
378 \retval RLRC_NoError One valid entry is parsed.
379 \retval RLRC_Unknown The line is invalid.
380 \retval RLRC_InternalError An internal error was encountered.
381 */
382 static enum ReadLogReturnCodeEnum ExtLog_ReadEntry(char *Line,struct ReadLogStruct *Entry)
383 {
384 int col;
385 enum ext_col_id col_id;
386 char *Ip=NULL;
387 char *IpEnd;
388 char *UserEnd;
389 char *UrlEnd;
390 char *HttpCodeEnd;
391
392 // is it a directive
393 if (*Line=='#') {
394 enum ReadLogReturnCodeEnum status=ExtLog_Directive(Line);
395 if (status!=RLRC_Unknown) InExtLog=true;
396 return(status);
397 }
398 if (!InExtLog) return(RLRC_Unknown);
399
400 col=0;
401 while (*Line) {
402 if (col>=ExtColNumber) {
403 debuga(__FILE__,__LINE__,_("Too many columns in an extended log file format: %d columns found when %d have been announced\n"),col,ExtColNumber);
404 return(RLRC_Unknown);
405 }
406 col_id=ExtLog_WhichColumn(col);
407 switch (col_id)
408 {
409 case EXTCOL_Ip:
410 Entry->Ip=Ip=Line;
411 Line=ExtLog_GetString(Line,col,&IpEnd);
412 if (!Line) return(RLRC_Unknown);
413 break;
414 case EXTCOL_UserName:
415 Entry->User=Line;
416 Line=ExtLog_GetString(Line,col,&UserEnd);
417 if (!Line) return(RLRC_Unknown);
418 break;
419 case EXTCOL_Date:
420 Line=ExtLog_GetDate(Line,&Entry->EntryTime);
421 if (!Line) return(RLRC_Unknown);
422 break;
423 case EXTCOL_Time:
424 Line=ExtLog_GetTime(Line,&Entry->EntryTime);
425 if (!Line) return(RLRC_Unknown);
426 break;
427 case EXTCOL_TimeTaken:
428 Line=ExtLog_GetLongInt(Line,&Entry->ElapsedTime);
429 if (!Line) return(RLRC_Unknown);
430 break;
431 case EXTCOL_Bytes:
432 Line=ExtLog_GetLongLongInt(Line,&Entry->DataSize);
433 if (!Line) return(RLRC_Unknown);
434 break;
435 case EXTCOL_Uri:
436 Entry->Url=Line;
437 Line=ExtLog_GetString(Line,col,&UrlEnd);
438 if (!Line) return(RLRC_Unknown);
439 break;
440 case EXTCOL_Status:
441 Entry->HttpCode=Line;
442 Line=ExtLog_GetString(Line,col,&HttpCodeEnd);
443 if (!Line) return(RLRC_Unknown);
444 break;
445 case EXTCOL_Last://ignored column
446 Line=ExtLog_GetString(Line,col,NULL);
447 if (!Line) return(RLRC_Unknown);
448 break;
449 }
450 if (*Line && *Line!=ExtColSep[col]) return(RLRC_Unknown);
451 while (*Line && *Line==ExtColSep[col]) Line++;
452 col++;
453 }
454 if (col!=ExtColNumber) {
455 debuga(__FILE__,__LINE__,_("Only %d columns in an extended log file format when %d have been announced\n"),col,ExtColNumber);
456 return(RLRC_Unknown);
457 }
458
459 // check the entry time
460 if (mktime(&Entry->EntryTime)==-1) {
461 debuga(__FILE__,__LINE__,_("Invalid date or time found in the extended log file\n"));
462 return(RLRC_InternalError);
463 }
464
465 ExtLog_FixString(Ip,IpEnd);
466 ExtLog_FixString(Entry->User,UserEnd);
467 ExtLog_FixString(Entry->Url,UrlEnd);
468 ExtLog_FixString(Entry->HttpCode,HttpCodeEnd);
469
470 return(RLRC_NoError);
471 }
472
473 //! \brief Object to read an extended log.
474 const struct ReadLogProcessStruct ReadExtLog=
475 {
476 /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */
477 N_("extended log format"),
478 ExtLog_NewFile,
479 ExtLog_ReadEntry
480 };