]>
Commit | Line | Data |
---|---|---|
1c91da07 FM |
1 | /* |
2 | * SARG Squid Analysis Report Generator http://sarg.sourceforge.net | |
110ce984 | 3 | * 1998, 2015 |
1c91da07 FM |
4 | * |
5 | * SARG donations: | |
6 | * please look at http://sarg.sourceforge.net/donations.php | |
7 | * Support: | |
8 | * http://sourceforge.net/projects/sarg/forums/forum/363374 | |
9 | * --------------------------------------------------------------------- | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License as published by | |
13 | * the Free Software Foundation; either version 2 of the License, or | |
14 | * (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; if not, write to the Free Software | |
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. | |
24 | * | |
25 | */ | |
26 | ||
27 | #include "include/conf.h" | |
28 | #include "include/defs.h" | |
f83d7b44 | 29 | #include "include/readlog.h" |
1c91da07 | 30 | |
86d99d08 FM |
31 | /*! |
32 | Maximum number of columns accepted in an extended log format. | |
33 | ||
34 | The current value is an arbitrary number chosen to have an | |
35 | actual limit. | |
36 | */ | |
37 | #define MAX_EXT_COLUMNS 250 | |
38 | ||
39 | enum ext_col_id { | |
40 | EXTCOL_Ip, | |
41 | EXTCOL_UserName, | |
42 | EXTCOL_Date, | |
43 | EXTCOL_Time, | |
44 | EXTCOL_TimeTaken, | |
45 | EXTCOL_Bytes, | |
46 | EXTCOL_Uri, | |
47 | EXTCOL_Status, | |
48 | EXTCOL_Last //last entry of the list ! | |
49 | }; | |
50 | ||
51 | //! \c True if the extended common long format is confirmed. | |
52 | static bool InExtLog=false; | |
53 | //! The index of relevant columns in the log file. | |
54 | static int ExtCols[EXTCOL_Last]; | |
55 | //! The character to use as a columns separator. | |
56 | static char ExtColSep[MAX_EXT_COLUMNS]; | |
57 | //! The number of columns according to the "fields" directive. | |
58 | static int ExtColNumber; | |
59 | ||
1c91da07 FM |
60 | /*! |
61 | A new file is being read. The name of the file is \a FileName. | |
62 | */ | |
63 | static void ExtLog_NewFile(const char *FileName) | |
64 | { | |
86d99d08 FM |
65 | InExtLog=false; |
66 | ExtColNumber=0; | |
67 | } | |
68 | ||
69 | /*! | |
70 | Parse the "Fields" directive listing the columns in the log. The | |
71 | \a columns is a pointer to the first column of the directive. | |
72 | ||
73 | \return \c True if the fields is valid or false if it could not | |
74 | be decoded. | |
75 | */ | |
76 | static bool ExtLog_Fields(const char *columns) | |
77 | { | |
78 | int col; | |
79 | int len; | |
80 | int prefix; | |
81 | int header_start; | |
82 | int header_end; | |
83 | int i; | |
84 | enum ext_col_id col_id; | |
85 | char col_sep; | |
86 | // see http://www.w3.org/TR/WD-logfile.html for the list of prefixes | |
87 | const char const *prefixes[]= | |
88 | { | |
89 | "c", | |
90 | "s", | |
91 | "r", | |
92 | "cs", | |
93 | "sc", | |
94 | "sr", | |
95 | "rs", | |
96 | "x", | |
97 | }; | |
98 | ||
99 | for (i=0 ; i<EXTCOL_Last ; i++) ExtCols[i]=-1; | |
100 | ||
101 | col=0; | |
102 | while (*columns) { | |
103 | if (col>=MAX_EXT_COLUMNS) { | |
104 | debuga(_("Too many columns found in an extended log format. The maximum allowed is %d but it can be changed if a bigger value is legitimate\n"),MAX_EXT_COLUMNS); | |
105 | exit(EXIT_FAILURE); | |
106 | } | |
107 | prefix=-1; | |
108 | header_start=-1; | |
109 | header_end=-1; | |
110 | for (i=sizeof(prefixes)/sizeof(*prefixes)-1 ; i>=0 ; i--) { | |
111 | len=strlen(prefixes[i]); | |
112 | if (strncasecmp(columns,prefixes[i],len)==0) { | |
113 | if (columns[len]=='-') { | |
114 | prefix=len++; | |
115 | break; | |
116 | } else if (columns[len]=='(') { | |
117 | header_start=len++; | |
118 | break; | |
119 | } | |
120 | } | |
121 | } | |
40cd345e | 122 | (void)prefix;//compiler pacifier |
86d99d08 FM |
123 | if (i<0) len=0; |
124 | for ( ; (unsigned char)columns[len]>' ' ; len++) {//skip a word and accept any separator (tab or space) | |
125 | if (header_start>=0 && columns[len]==')') header_end=len; | |
126 | } | |
40cd345e | 127 | (void)header_end;//compiler pacifier |
86d99d08 FM |
128 | col_sep=columns[len]; |
129 | ExtColSep[col]=col_sep; | |
130 | ||
131 | // see http://www.w3.org/TR/WD-logfile.html for list of possible identifiers | |
132 | col_id=EXTCOL_Last; | |
133 | if (len==4) { | |
134 | if (strncasecmp(columns,"c-ip",len)==0 && ExtCols[EXTCOL_Ip]<0) col_id=EXTCOL_Ip; | |
135 | else if (strncasecmp(columns,"date",len)==0) col_id=EXTCOL_Date; | |
136 | else if (strncasecmp(columns,"time",len)==0) col_id=EXTCOL_Time; | |
137 | } else if (len==5) { | |
138 | if (strncasecmp(columns,"c-dns",len)==0) col_id=EXTCOL_Ip; | |
139 | } else if (len==6) { | |
140 | if (strncasecmp(columns,"cs-uri",len)==0) col_id=EXTCOL_Uri; | |
141 | } else if (len==8) { | |
142 | if (strncasecmp(columns,"sc-bytes",len)==0) col_id=EXTCOL_Bytes; | |
143 | } else if (len==9) { | |
144 | if (strncasecmp(columns,"sc-status",len)==0) col_id=EXTCOL_Status; | |
145 | } else if (len==10) { | |
146 | if (strncasecmp(columns,"time-taken",len)==0) col_id=EXTCOL_TimeTaken; | |
147 | } else if (len==11) { | |
148 | if (strncasecmp(columns,"cs-username",len)==0) col_id=EXTCOL_UserName; | |
149 | } | |
150 | if (col_id!=EXTCOL_Last) { | |
151 | ExtCols[col_id]=col; | |
152 | } | |
153 | ||
154 | col++; | |
155 | columns+=len; | |
156 | while (*columns && (unsigned char)*columns<=' ') { | |
157 | if (*columns!=col_sep) { | |
158 | debuga(_("Multiple column separators found between two columns in the \"fields\" directive of an extended log format\n")); | |
159 | exit(EXIT_FAILURE); | |
160 | } | |
161 | columns++; | |
162 | } | |
163 | } | |
164 | ExtColNumber=col; | |
165 | return(true); | |
166 | } | |
167 | ||
168 | /*! | |
169 | Decode a directive field from the \a Line. | |
170 | ||
171 | \return RLRC_Ignore if the line is a directive or RLRC_Unknown | |
172 | if the line is not a known directive. | |
173 | */ | |
174 | static enum ReadLogReturnCodeEnum ExtLog_Directive(const char *Line) | |
175 | { | |
176 | ++Line; | |
177 | if (strncasecmp(Line,"Version:",8)==0) return(RLRC_Ignore); | |
178 | if (strncasecmp(Line,"Software:",9)==0) return(RLRC_Ignore); | |
179 | if (strncasecmp(Line,"Start-Date:",11)==0) return(RLRC_Ignore); | |
180 | if (strncasecmp(Line,"End-Date:",9)==0) return(RLRC_Ignore); | |
181 | if (strncasecmp(Line,"Date:",5)==0) return(RLRC_Ignore); | |
182 | if (strncasecmp(Line,"Remark:",7)==0) return(RLRC_Ignore); | |
183 | if (strncasecmp(Line,"Fields:",7)==0) { | |
184 | Line+=7; | |
185 | while (*Line==' ' || *Line=='\t') Line++; | |
186 | if (!ExtLog_Fields(Line)) return(RLRC_Unknown); | |
187 | return(RLRC_Ignore); | |
188 | } | |
189 | return(RLRC_Unknown); | |
190 | } | |
191 | ||
192 | /*! | |
193 | Get the type of the column \a col_num. | |
194 | ||
195 | \return The type of the column or EXTCOL_Last if | |
196 | the column must be ignored. | |
197 | */ | |
198 | static enum ext_col_id ExtLog_WhichColumn(int col_num) | |
199 | { | |
200 | int i; | |
201 | ||
202 | for (i=0 ; i<EXTCOL_Last && ExtCols[i]!=col_num ; i++); | |
203 | return(i); | |
204 | } | |
205 | ||
206 | /*! | |
207 | Scan through the string of a column. | |
208 | ||
209 | \param Line The pointer to the beginning of the string. | |
210 | \param col The column number. | |
211 | */ | |
212 | static char *ExtLog_GetString(char *Line,int col,char **End) | |
213 | { | |
214 | bool quote; | |
215 | bool dequote; | |
216 | ||
217 | //skip opening double quote | |
218 | quote=(*Line=='\"'); | |
219 | if (quote) ++Line; | |
220 | ||
221 | dequote=false; | |
222 | while (*Line) { | |
223 | if (quote) { | |
224 | if (*Line=='\"') { | |
225 | if (Line[1]!='\"') { | |
226 | if (End) *End=(dequote) ? NULL : Line; | |
227 | Line++;//skip the closing quote | |
228 | quote=false; | |
229 | break; | |
230 | } | |
231 | dequote=true; | |
232 | } | |
233 | } else { | |
234 | if (*Line==ExtColSep[col]) { | |
235 | if (End) *End=Line; | |
236 | break; | |
237 | } | |
238 | } | |
239 | Line++; | |
240 | } | |
241 | if (quote) return(NULL);//missing closing quote. | |
242 | return(Line); | |
243 | } | |
244 | ||
245 | /*! | |
246 | Scan through the date in a column. | |
247 | ||
248 | \param Line The pointer to the beginning of the string. | |
249 | */ | |
250 | static char *ExtLog_GetDate(char *Line,struct tm *Date) | |
251 | { | |
252 | bool quote; | |
253 | int year; | |
254 | int month; | |
255 | int day; | |
256 | int next; | |
257 | ||
258 | //skip opening double quote | |
259 | quote=(*Line=='\"'); | |
260 | if (quote) ++Line; | |
261 | if (sscanf(Line,"%d-%d-%d%n",&year,&month,&day,&next)!=3) return(NULL); | |
262 | Line+=next; | |
263 | if (quote) { | |
264 | if (*Line!='\"') return(NULL);//missing closing quote. | |
265 | ++Line; | |
266 | } | |
47b06c7a | 267 | Date->tm_year=year-1900; |
d625117d | 268 | Date->tm_mon=month-1; |
86d99d08 FM |
269 | Date->tm_mday=day; |
270 | return(Line); | |
271 | } | |
272 | ||
273 | /*! | |
274 | Scan through the time in a column. | |
275 | ||
276 | \param Line The pointer to the beginning of the string. | |
277 | */ | |
278 | static char *ExtLog_GetTime(char *Line,struct tm *Date) | |
279 | { | |
280 | bool quote; | |
281 | int hour; | |
282 | int minute; | |
283 | int second; | |
284 | int next; | |
285 | ||
286 | //skip opening double quote | |
287 | quote=(*Line=='\"'); | |
288 | if (quote) ++Line; | |
289 | if (sscanf(Line,"%d:%d:%d%n",&hour,&minute,&second,&next)!=3) return(NULL); | |
290 | Line+=next; | |
291 | if (quote) { | |
292 | if (*Line!='\"') return(NULL);//missing closing quote. | |
293 | ++Line; | |
294 | } | |
295 | Date->tm_hour=hour; | |
296 | Date->tm_min=minute; | |
297 | Date->tm_sec=second; | |
298 | return(Line); | |
299 | } | |
300 | ||
301 | /*! | |
302 | Scan through a number in a column. | |
303 | ||
304 | \param Line The pointer to the beginning of the string. | |
305 | \param Value A variable to store the number. | |
306 | */ | |
307 | static char *ExtLog_GetLongInt(char *Line,long int *Value) | |
308 | { | |
309 | bool quote; | |
310 | ||
311 | //skip opening double quote | |
312 | quote=(*Line=='\"'); | |
313 | if (quote) ++Line; | |
314 | *Value=0; | |
315 | while (isdigit(*Line)) *Value=*Value*10+(*Line++-'0'); | |
316 | if (quote) { | |
317 | if (*Line!='\"') return(NULL);//missing closing quote. | |
318 | ++Line; | |
319 | } | |
320 | return(Line); | |
321 | } | |
322 | ||
323 | /*! | |
324 | Scan through a number in a column. | |
325 | ||
326 | \param Line The pointer to the beginning of the string. | |
327 | \param Value A variable to store the number. | |
328 | */ | |
329 | static char *ExtLog_GetLongLongInt(char *Line,long long int *Value) | |
330 | { | |
331 | bool quote; | |
332 | ||
333 | //skip opening double quote | |
334 | quote=(*Line=='\"'); | |
335 | if (quote) ++Line; | |
336 | *Value=0; | |
337 | while (isdigit(*Line)) *Value=*Value*10+(*Line++-'0'); | |
338 | if (quote) { | |
339 | if (*Line!='\"') return(NULL);//missing closing quote. | |
340 | ++Line; | |
341 | } | |
342 | return(Line); | |
343 | } | |
344 | ||
345 | /*! | |
346 | Remove the quotes inside the \a string. If no quotes are known to | |
347 | be in the string, the \a end_ptr is the pointer to the last | |
348 | character of the string. | |
349 | */ | |
350 | static void ExtLog_FixString(char *string,char *end_ptr) | |
351 | { | |
352 | char *dest; | |
353 | ||
354 | if (!string) return;//string not parsed | |
355 | if (end_ptr) { //end is known and no quotes are in the string | |
356 | *end_ptr='\0'; | |
357 | return; | |
358 | } | |
359 | // remove the quotes and end at the first unremoveable quote | |
360 | dest=string; | |
361 | while (*string) | |
362 | { | |
363 | if (*string=='\"') { | |
364 | if (string[1]!='\"') break; //closing quote | |
365 | string++;//skip the first quote | |
366 | } | |
367 | *dest++=*string++; | |
368 | } | |
369 | *dest='\0'; | |
1c91da07 FM |
370 | } |
371 | ||
372 | /*! | |
373 | Read one entry from an extended log. | |
374 | ||
375 | \param Line One line from the input log file. | |
376 | \param Entry Where to store the information parsed from the line. | |
377 | ||
378 | \retval RLRC_NoError One valid entry is parsed. | |
379 | \retval RLRC_Unknown The line is invalid. | |
380 | \retval RLRC_InternalError An internal error was encountered. | |
381 | */ | |
382 | static enum ReadLogReturnCodeEnum ExtLog_ReadEntry(char *Line,struct ReadLogStruct *Entry) | |
383 | { | |
86d99d08 FM |
384 | int col; |
385 | enum ext_col_id col_id; | |
f83d7b44 | 386 | char *Ip=NULL; |
86d99d08 | 387 | char *IpEnd; |
f83d7b44 | 388 | char *User=NULL; |
86d99d08 FM |
389 | char *UserEnd; |
390 | char *UrlEnd; | |
391 | char *HttpCodeEnd; | |
392 | ||
393 | // is it a directive | |
394 | if (*Line=='#') { | |
395 | enum ReadLogReturnCodeEnum status=ExtLog_Directive(Line); | |
396 | if (status!=RLRC_Unknown) InExtLog=true; | |
397 | return(status); | |
398 | } | |
399 | if (!InExtLog) return(RLRC_Unknown); | |
400 | ||
401 | col=0; | |
402 | while (*Line) { | |
403 | if (col>=ExtColNumber) { | |
404 | debuga(_("Too many columns in an extended log file format: %d columns found when %d have been announced\n"),col,ExtColNumber); | |
405 | return(RLRC_Unknown); | |
406 | } | |
407 | col_id=ExtLog_WhichColumn(col); | |
408 | switch (col_id) | |
409 | { | |
410 | case EXTCOL_Ip: | |
f83d7b44 | 411 | Entry->Ip=Ip=Line; |
86d99d08 FM |
412 | Line=ExtLog_GetString(Line,col,&IpEnd); |
413 | if (!Line) return(RLRC_Unknown); | |
414 | break; | |
415 | case EXTCOL_UserName: | |
f83d7b44 | 416 | Entry->User=User=Line; |
86d99d08 FM |
417 | Line=ExtLog_GetString(Line,col,&UserEnd); |
418 | if (!Line) return(RLRC_Unknown); | |
419 | break; | |
420 | case EXTCOL_Date: | |
421 | Line=ExtLog_GetDate(Line,&Entry->EntryTime); | |
422 | if (!Line) return(RLRC_Unknown); | |
423 | break; | |
424 | case EXTCOL_Time: | |
425 | Line=ExtLog_GetTime(Line,&Entry->EntryTime); | |
426 | if (!Line) return(RLRC_Unknown); | |
427 | break; | |
428 | case EXTCOL_TimeTaken: | |
429 | Line=ExtLog_GetLongInt(Line,&Entry->ElapsedTime); | |
430 | if (!Line) return(RLRC_Unknown); | |
431 | break; | |
432 | case EXTCOL_Bytes: | |
433 | Line=ExtLog_GetLongLongInt(Line,&Entry->DataSize); | |
434 | if (!Line) return(RLRC_Unknown); | |
435 | break; | |
436 | case EXTCOL_Uri: | |
437 | Entry->Url=Line; | |
438 | Line=ExtLog_GetString(Line,col,&UrlEnd); | |
439 | if (!Line) return(RLRC_Unknown); | |
440 | break; | |
441 | case EXTCOL_Status: | |
442 | Entry->HttpCode=Line; | |
443 | Line=ExtLog_GetString(Line,col,&HttpCodeEnd); | |
444 | if (!Line) return(RLRC_Unknown); | |
445 | break; | |
446 | case EXTCOL_Last://ignored column | |
447 | Line=ExtLog_GetString(Line,col,NULL); | |
448 | if (!Line) return(RLRC_Unknown); | |
449 | break; | |
450 | } | |
451 | if (*Line && *Line!=ExtColSep[col]) return(RLRC_Unknown); | |
452 | while (*Line && *Line==ExtColSep[col]) Line++; | |
453 | col++; | |
454 | } | |
455 | if (col!=ExtColNumber) { | |
456 | debuga(_("Only %d columns in an extended log file format when %d have been announced\n"),col,ExtColNumber); | |
457 | return(RLRC_Unknown); | |
458 | } | |
459 | ||
460 | // check the entry time | |
461 | if (mktime(&Entry->EntryTime)==-1) { | |
462 | debuga(_("Invalid date or time found in the extended log file\n")); | |
463 | return(RLRC_InternalError); | |
464 | } | |
465 | ||
f83d7b44 FM |
466 | ExtLog_FixString(Ip,IpEnd); |
467 | ExtLog_FixString(User,UserEnd); | |
86d99d08 FM |
468 | ExtLog_FixString(Entry->Url,UrlEnd); |
469 | ExtLog_FixString(Entry->HttpCode,HttpCodeEnd); | |
470 | ||
1c91da07 FM |
471 | return(RLRC_NoError); |
472 | } | |
473 | ||
474 | //! \brief Object to read an extended log. | |
475 | const struct ReadLogProcessStruct ReadExtLog= | |
476 | { | |
477 | /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */ | |
478 | N_("extended log format"), | |
479 | ExtLog_NewFile, | |
480 | ExtLog_ReadEntry | |
481 | }; |