]> git.ipfire.org Git - thirdparty/sarg.git/blame - readlog_extlog.c
Strip a suffix from the user name
[thirdparty/sarg.git] / readlog_extlog.c
CommitLineData
1c91da07
FM
1/*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
110ce984 3 * 1998, 2015
1c91da07
FM
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27#include "include/conf.h"
28#include "include/defs.h"
f83d7b44 29#include "include/readlog.h"
1c91da07 30
86d99d08
FM
31/*!
32Maximum number of columns accepted in an extended log format.
33
34The current value is an arbitrary number chosen to have an
35actual limit.
36*/
37#define MAX_EXT_COLUMNS 250
38
39enum ext_col_id {
40 EXTCOL_Ip,
41 EXTCOL_UserName,
42 EXTCOL_Date,
43 EXTCOL_Time,
44 EXTCOL_TimeTaken,
45 EXTCOL_Bytes,
46 EXTCOL_Uri,
47 EXTCOL_Status,
48 EXTCOL_Last //last entry of the list !
49};
50
51//! \c True if the extended common long format is confirmed.
52static bool InExtLog=false;
53//! The index of relevant columns in the log file.
54static int ExtCols[EXTCOL_Last];
55//! The character to use as a columns separator.
56static char ExtColSep[MAX_EXT_COLUMNS];
57//! The number of columns according to the "fields" directive.
58static int ExtColNumber;
59
1c91da07
FM
60/*!
61A new file is being read. The name of the file is \a FileName.
62*/
63static void ExtLog_NewFile(const char *FileName)
64{
86d99d08
FM
65 InExtLog=false;
66 ExtColNumber=0;
67}
68
69/*!
70Parse the "Fields" directive listing the columns in the log. The
71\a columns is a pointer to the first column of the directive.
72
73\return \c True if the fields is valid or false if it could not
74be decoded.
75*/
76static bool ExtLog_Fields(const char *columns)
77{
78 int col;
79 int len;
80 int prefix;
81 int header_start;
82 int header_end;
83 int i;
84 enum ext_col_id col_id;
85 char col_sep;
86 // see http://www.w3.org/TR/WD-logfile.html for the list of prefixes
87 const char const *prefixes[]=
88 {
89 "c",
90 "s",
91 "r",
92 "cs",
93 "sc",
94 "sr",
95 "rs",
96 "x",
97 };
98
99 for (i=0 ; i<EXTCOL_Last ; i++) ExtCols[i]=-1;
100
101 col=0;
102 while (*columns) {
103 if (col>=MAX_EXT_COLUMNS) {
af961877 104 debuga(__FILE__,__LINE__,_("Too many columns found in an extended log format. The maximum allowed is %d but it can be changed if a bigger value is legitimate\n"),MAX_EXT_COLUMNS);
86d99d08
FM
105 exit(EXIT_FAILURE);
106 }
107 prefix=-1;
108 header_start=-1;
109 header_end=-1;
110 for (i=sizeof(prefixes)/sizeof(*prefixes)-1 ; i>=0 ; i--) {
111 len=strlen(prefixes[i]);
112 if (strncasecmp(columns,prefixes[i],len)==0) {
113 if (columns[len]=='-') {
114 prefix=len++;
115 break;
116 } else if (columns[len]=='(') {
117 header_start=len++;
118 break;
119 }
120 }
121 }
40cd345e 122 (void)prefix;//compiler pacifier
86d99d08
FM
123 if (i<0) len=0;
124 for ( ; (unsigned char)columns[len]>' ' ; len++) {//skip a word and accept any separator (tab or space)
125 if (header_start>=0 && columns[len]==')') header_end=len;
126 }
40cd345e 127 (void)header_end;//compiler pacifier
86d99d08
FM
128 col_sep=columns[len];
129 ExtColSep[col]=col_sep;
130
131 // see http://www.w3.org/TR/WD-logfile.html for list of possible identifiers
132 col_id=EXTCOL_Last;
133 if (len==4) {
134 if (strncasecmp(columns,"c-ip",len)==0 && ExtCols[EXTCOL_Ip]<0) col_id=EXTCOL_Ip;
135 else if (strncasecmp(columns,"date",len)==0) col_id=EXTCOL_Date;
136 else if (strncasecmp(columns,"time",len)==0) col_id=EXTCOL_Time;
137 } else if (len==5) {
138 if (strncasecmp(columns,"c-dns",len)==0) col_id=EXTCOL_Ip;
139 } else if (len==6) {
140 if (strncasecmp(columns,"cs-uri",len)==0) col_id=EXTCOL_Uri;
141 } else if (len==8) {
142 if (strncasecmp(columns,"sc-bytes",len)==0) col_id=EXTCOL_Bytes;
143 } else if (len==9) {
144 if (strncasecmp(columns,"sc-status",len)==0) col_id=EXTCOL_Status;
145 } else if (len==10) {
146 if (strncasecmp(columns,"time-taken",len)==0) col_id=EXTCOL_TimeTaken;
147 } else if (len==11) {
148 if (strncasecmp(columns,"cs-username",len)==0) col_id=EXTCOL_UserName;
149 }
150 if (col_id!=EXTCOL_Last) {
151 ExtCols[col_id]=col;
152 }
153
154 col++;
155 columns+=len;
156 while (*columns && (unsigned char)*columns<=' ') {
157 if (*columns!=col_sep) {
af961877 158 debuga(__FILE__,__LINE__,_("Multiple column separators found between two columns in the \"fields\" directive of an extended log format\n"));
86d99d08
FM
159 exit(EXIT_FAILURE);
160 }
161 columns++;
162 }
163 }
164 ExtColNumber=col;
165 return(true);
166}
167
168/*!
169Decode a directive field from the \a Line.
170
171\return RLRC_Ignore if the line is a directive or RLRC_Unknown
172if the line is not a known directive.
173*/
174static enum ReadLogReturnCodeEnum ExtLog_Directive(const char *Line)
175{
176 ++Line;
177 if (strncasecmp(Line,"Version:",8)==0) return(RLRC_Ignore);
178 if (strncasecmp(Line,"Software:",9)==0) return(RLRC_Ignore);
179 if (strncasecmp(Line,"Start-Date:",11)==0) return(RLRC_Ignore);
180 if (strncasecmp(Line,"End-Date:",9)==0) return(RLRC_Ignore);
181 if (strncasecmp(Line,"Date:",5)==0) return(RLRC_Ignore);
182 if (strncasecmp(Line,"Remark:",7)==0) return(RLRC_Ignore);
183 if (strncasecmp(Line,"Fields:",7)==0) {
184 Line+=7;
185 while (*Line==' ' || *Line=='\t') Line++;
186 if (!ExtLog_Fields(Line)) return(RLRC_Unknown);
187 return(RLRC_Ignore);
188 }
189 return(RLRC_Unknown);
190}
191
192/*!
193Get the type of the column \a col_num.
194
195\return The type of the column or EXTCOL_Last if
196the column must be ignored.
197*/
198static enum ext_col_id ExtLog_WhichColumn(int col_num)
199{
200 int i;
201
202 for (i=0 ; i<EXTCOL_Last && ExtCols[i]!=col_num ; i++);
203 return(i);
204}
205
206/*!
207Scan through the string of a column.
208
209\param Line The pointer to the beginning of the string.
210\param col The column number.
211*/
212static char *ExtLog_GetString(char *Line,int col,char **End)
213{
214 bool quote;
215 bool dequote;
216
217 //skip opening double quote
218 quote=(*Line=='\"');
219 if (quote) ++Line;
220
221 dequote=false;
222 while (*Line) {
223 if (quote) {
224 if (*Line=='\"') {
225 if (Line[1]!='\"') {
226 if (End) *End=(dequote) ? NULL : Line;
227 Line++;//skip the closing quote
228 quote=false;
229 break;
230 }
231 dequote=true;
232 }
233 } else {
234 if (*Line==ExtColSep[col]) {
235 if (End) *End=Line;
236 break;
237 }
238 }
239 Line++;
240 }
241 if (quote) return(NULL);//missing closing quote.
242 return(Line);
243}
244
245/*!
246Scan through the date in a column.
247
248\param Line The pointer to the beginning of the string.
249*/
250static char *ExtLog_GetDate(char *Line,struct tm *Date)
251{
252 bool quote;
253 int year;
254 int month;
255 int day;
256 int next;
257
258 //skip opening double quote
259 quote=(*Line=='\"');
260 if (quote) ++Line;
261 if (sscanf(Line,"%d-%d-%d%n",&year,&month,&day,&next)!=3) return(NULL);
262 Line+=next;
263 if (quote) {
264 if (*Line!='\"') return(NULL);//missing closing quote.
265 ++Line;
266 }
47b06c7a 267 Date->tm_year=year-1900;
d625117d 268 Date->tm_mon=month-1;
86d99d08
FM
269 Date->tm_mday=day;
270 return(Line);
271}
272
273/*!
274Scan through the time in a column.
275
276\param Line The pointer to the beginning of the string.
277*/
278static char *ExtLog_GetTime(char *Line,struct tm *Date)
279{
280 bool quote;
281 int hour;
282 int minute;
283 int second;
284 int next;
285
286 //skip opening double quote
287 quote=(*Line=='\"');
288 if (quote) ++Line;
289 if (sscanf(Line,"%d:%d:%d%n",&hour,&minute,&second,&next)!=3) return(NULL);
290 Line+=next;
291 if (quote) {
292 if (*Line!='\"') return(NULL);//missing closing quote.
293 ++Line;
294 }
295 Date->tm_hour=hour;
296 Date->tm_min=minute;
297 Date->tm_sec=second;
298 return(Line);
299}
300
301/*!
302Scan through a number in a column.
303
304\param Line The pointer to the beginning of the string.
305\param Value A variable to store the number.
306*/
307static char *ExtLog_GetLongInt(char *Line,long int *Value)
308{
309 bool quote;
310
311 //skip opening double quote
312 quote=(*Line=='\"');
313 if (quote) ++Line;
314 *Value=0;
315 while (isdigit(*Line)) *Value=*Value*10+(*Line++-'0');
316 if (quote) {
317 if (*Line!='\"') return(NULL);//missing closing quote.
318 ++Line;
319 }
320 return(Line);
321}
322
323/*!
324Scan through a number in a column.
325
326\param Line The pointer to the beginning of the string.
327\param Value A variable to store the number.
328*/
329static char *ExtLog_GetLongLongInt(char *Line,long long int *Value)
330{
331 bool quote;
332
333 //skip opening double quote
334 quote=(*Line=='\"');
335 if (quote) ++Line;
336 *Value=0;
337 while (isdigit(*Line)) *Value=*Value*10+(*Line++-'0');
338 if (quote) {
339 if (*Line!='\"') return(NULL);//missing closing quote.
340 ++Line;
341 }
342 return(Line);
343}
344
345/*!
346Remove the quotes inside the \a string. If no quotes are known to
347be in the string, the \a end_ptr is the pointer to the last
348character of the string.
349*/
350static void ExtLog_FixString(char *string,char *end_ptr)
351{
352 char *dest;
353
354 if (!string) return;//string not parsed
355 if (end_ptr) { //end is known and no quotes are in the string
356 *end_ptr='\0';
357 return;
358 }
359 // remove the quotes and end at the first unremoveable quote
360 dest=string;
361 while (*string)
362 {
363 if (*string=='\"') {
364 if (string[1]!='\"') break; //closing quote
365 string++;//skip the first quote
366 }
367 *dest++=*string++;
368 }
369 *dest='\0';
1c91da07
FM
370}
371
372/*!
373Read one entry from an extended log.
374
375\param Line One line from the input log file.
376\param Entry Where to store the information parsed from the line.
377
378\retval RLRC_NoError One valid entry is parsed.
379\retval RLRC_Unknown The line is invalid.
380\retval RLRC_InternalError An internal error was encountered.
381*/
382static enum ReadLogReturnCodeEnum ExtLog_ReadEntry(char *Line,struct ReadLogStruct *Entry)
383{
86d99d08
FM
384 int col;
385 enum ext_col_id col_id;
f83d7b44 386 char *Ip=NULL;
86d99d08
FM
387 char *IpEnd;
388 char *UserEnd;
389 char *UrlEnd;
390 char *HttpCodeEnd;
391
392 // is it a directive
393 if (*Line=='#') {
394 enum ReadLogReturnCodeEnum status=ExtLog_Directive(Line);
395 if (status!=RLRC_Unknown) InExtLog=true;
396 return(status);
397 }
398 if (!InExtLog) return(RLRC_Unknown);
399
400 col=0;
401 while (*Line) {
402 if (col>=ExtColNumber) {
af961877 403 debuga(__FILE__,__LINE__,_("Too many columns in an extended log file format: %d columns found when %d have been announced\n"),col,ExtColNumber);
86d99d08
FM
404 return(RLRC_Unknown);
405 }
406 col_id=ExtLog_WhichColumn(col);
407 switch (col_id)
408 {
409 case EXTCOL_Ip:
f83d7b44 410 Entry->Ip=Ip=Line;
86d99d08
FM
411 Line=ExtLog_GetString(Line,col,&IpEnd);
412 if (!Line) return(RLRC_Unknown);
413 break;
414 case EXTCOL_UserName:
36a0b94c 415 Entry->User=Line;
86d99d08
FM
416 Line=ExtLog_GetString(Line,col,&UserEnd);
417 if (!Line) return(RLRC_Unknown);
418 break;
419 case EXTCOL_Date:
420 Line=ExtLog_GetDate(Line,&Entry->EntryTime);
421 if (!Line) return(RLRC_Unknown);
422 break;
423 case EXTCOL_Time:
424 Line=ExtLog_GetTime(Line,&Entry->EntryTime);
425 if (!Line) return(RLRC_Unknown);
426 break;
427 case EXTCOL_TimeTaken:
428 Line=ExtLog_GetLongInt(Line,&Entry->ElapsedTime);
429 if (!Line) return(RLRC_Unknown);
430 break;
431 case EXTCOL_Bytes:
432 Line=ExtLog_GetLongLongInt(Line,&Entry->DataSize);
433 if (!Line) return(RLRC_Unknown);
434 break;
435 case EXTCOL_Uri:
436 Entry->Url=Line;
437 Line=ExtLog_GetString(Line,col,&UrlEnd);
438 if (!Line) return(RLRC_Unknown);
439 break;
440 case EXTCOL_Status:
441 Entry->HttpCode=Line;
442 Line=ExtLog_GetString(Line,col,&HttpCodeEnd);
443 if (!Line) return(RLRC_Unknown);
444 break;
445 case EXTCOL_Last://ignored column
446 Line=ExtLog_GetString(Line,col,NULL);
447 if (!Line) return(RLRC_Unknown);
448 break;
449 }
450 if (*Line && *Line!=ExtColSep[col]) return(RLRC_Unknown);
451 while (*Line && *Line==ExtColSep[col]) Line++;
452 col++;
453 }
454 if (col!=ExtColNumber) {
af961877 455 debuga(__FILE__,__LINE__,_("Only %d columns in an extended log file format when %d have been announced\n"),col,ExtColNumber);
86d99d08
FM
456 return(RLRC_Unknown);
457 }
458
459 // check the entry time
460 if (mktime(&Entry->EntryTime)==-1) {
af961877 461 debuga(__FILE__,__LINE__,_("Invalid date or time found in the extended log file\n"));
86d99d08
FM
462 return(RLRC_InternalError);
463 }
464
f83d7b44 465 ExtLog_FixString(Ip,IpEnd);
36a0b94c 466 ExtLog_FixString(Entry->User,UserEnd);
86d99d08
FM
467 ExtLog_FixString(Entry->Url,UrlEnd);
468 ExtLog_FixString(Entry->HttpCode,HttpCodeEnd);
469
1c91da07
FM
470 return(RLRC_NoError);
471}
472
473//! \brief Object to read an extended log.
474const struct ReadLogProcessStruct ReadExtLog=
475{
476 /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */
477 N_("extended log format"),
478 ExtLog_NewFile,
479 ExtLog_ReadEntry
480};