]> git.ipfire.org Git - thirdparty/sarg.git/blame - readlog.c
Store the entry time in a structure instead of a pointer
[thirdparty/sarg.git] / readlog.c
CommitLineData
27d1fa35
FM
1/*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2012
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27#include "include/conf.h"
28#include "include/defs.h"
29#include "include/readlog.h"
30
31#define REPORT_EVERY_X_LINES 5000
32#define MAX_OPEN_USER_FILES 10
33
34struct userfilestruct
35{
36 struct userfilestruct *next;
37 struct userinfostruct *user;
38 FILE *file;
39};
40
41numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
42numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
43
27d1fa35
FM
44extern char *userfile;
45
1c91da07
FM
46extern const struct ReadLogProcessStruct ReadSquidLog;
47extern const struct ReadLogProcessStruct ReadCommonLog;
48extern const struct ReadLogProcessStruct ReadSargLog;
49extern const struct ReadLogProcessStruct ReadExtLog;
50
51//! The list of the supported log formats.
52static const struct ReadLogProcessStruct const *LogFormats[]=
53{
54 &ReadSquidLog,
55 &ReadCommonLog,
56 &ReadSargLog,
57 &ReadExtLog
58};
59
27d1fa35
FM
60/*!
61Read the log files.
62
63\param Filter The filtering parameters for the file to load.
64
65\retval 1 Records found.
66\retval 0 No record found.
67*/
68int ReadLogFile(struct ReadLogDataStruct *Filter)
69{
27d1fa35
FM
70 longline line;
71 char *linebuf;
72 char *str;
27d1fa35 73 char arq_log[255];
1c91da07
FM
74 //char fun[MAXLEN];
75 //char elap[255];
27d1fa35 76 char user[MAX_USER_LEN];
1c91da07
FM
77 //char data[255];
78 //char ip[60];
27d1fa35 79 char hora[30];
1c91da07 80 //char mes[30];
27d1fa35 81 char tbuf2[128];
c5d6ef4b 82 char dia[128]="";
27d1fa35
FM
83 char wuser[MAXLEN];
84 char tmp3[MAXLEN];
85 char sz_Download_Unsort[20000];
86 char start_hour[128];
27d1fa35
FM
87 char download_url[MAXLEN];
88 char smartfilter[MAXLEN];
89 const char *arq;
90 const char *url;
91 int iarq;
92 int blen;
93 int OutputNonZero = REPORT_EVERY_X_LINES ;
94 int idata=0;
1c91da07 95 //int isa_ncols=0,isa_cols[ISACOL_Last];
27d1fa35
FM
96 int x;
97 int hmr;
98 int nopen;
99 int maxopenfiles=MAX_OPEN_USER_FILES;
100 int mindate=0;
101 int maxdate=0;
102 int cstatus;
1c91da07
FM
103 int current_format_idx;
104 int format_count[sizeof(LogFormats)/sizeof(*LogFormats)];
27d1fa35
FM
105 unsigned long int recs1=0UL;
106 unsigned long int recs2=0UL;
107 long int totregsl=0;
108 long int totregsg=0;
109 long int totregsx=0;
1c91da07 110 //long long int iyear, imonth, iday;
27d1fa35
FM
111 FILE *fp_in=NULL;
112 FILE *fp_log=NULL;
27d1fa35
FM
113 FILE *fp_Download_Unsort=NULL;
114 bool from_pipe;
115 bool from_stdin;
116 bool download_flag=false;
117 bool id_is_ip;
118 bool totper=false;
1c91da07 119 enum ReadLogReturnCodeEnum log_entry_status;
27d1fa35
FM
120 struct stat logstat;
121 struct getwordstruct gwarea;
1c91da07 122 //struct tm tt;
27d1fa35
FM
123 struct userfilestruct *prev_ufile;
124 struct userinfostruct *uinfo;
125 struct userfilestruct *first_user_file=NULL;
126 struct userfilestruct *ufile;
127 struct userfilestruct *ufile1;
c5d6ef4b 128 struct ReadLogStruct log_entry;
1c91da07
FM
129 const struct ReadLogProcessStruct *current_format=NULL;
130 //time_t tnum;
27d1fa35 131
1c91da07 132 for (x=0 ; x<sizeof(format_count)/sizeof(*format_count) ; x++) format_count[x]=0;
27d1fa35
FM
133 tmp3[0]='\0';
134 start_hour[0]='\0';
135 first_user_file=NULL;
136
137 snprintf(sz_Download_Unsort,sizeof(sz_Download_Unsort),"%s/download.int_unsort", tmp);
27d1fa35
FM
138
139 if(DataFile[0]=='\0') {
8e53b2e7 140 denied_open();
16b013cc 141 authfail_open();
27d1fa35
FM
142 }
143
144 if ((line=longline_create())==NULL) {
145 debuga(_("Not enough memory to read a log file\n"));
146 exit(EXIT_FAILURE);
147 }
148
149 for (iarq=0 ; iarq<NAccessLog ; iarq++) {
150 arq=AccessLog[iarq];
151
1c91da07
FM
152 current_format=NULL;
153 current_format_idx=-1;
154 for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++)
155 if (LogFormats[x]->NewFile)
156 LogFormats[x]->NewFile(arq);
157
158 if (arq[0]=='-' && arq[1]=='\0') {
27d1fa35
FM
159 if(debug)
160 debuga(_("Reading access log file: from stdin\n"));
161 fp_in=stdin;
162 from_stdin=true;
163 } else {
164 if (Filter->DateRange[0]!='\0') {
165 if (stat(arq,&logstat)!=0) {
166 debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
167 } else {
168 struct tm *logtime=localtime(&logstat.st_mtime);
169 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
170 debuga(_("Ignoring old log file %s\n"),arq);
171 continue;
172 }
173 }
174 }
175 fp_in=decomp(arq,&from_pipe);
176 if(fp_in==NULL) {
177 debuga(_("(log) Cannot open log file: %s - %s\n"),arq,strerror(errno));
178 exit(EXIT_FAILURE);
179 }
180 if(debug) debuga(_("Reading access log file: %s\n"),arq);
181 from_stdin=false;
182 }
183
27d1fa35
FM
184 download_flag=false;
185
2f4787e6
FM
186 recs1=0UL;
187 recs2=0UL;
188
27d1fa35 189 // pre-read the file only if we have to show stats
2f4787e6 190 if (ShowReadStatistics && ShowReadPercent && !from_stdin && !from_pipe) {
27d1fa35
FM
191 size_t nread,i;
192 bool skipcr=false;
193 char tmp4[MAXLEN];
194
27d1fa35
FM
195 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
196 for (i=0 ; i<nread ; i++)
197 if (skipcr) {
198 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
199 skipcr=false;
200 }
201 } else {
202 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
203 skipcr=true;
204 recs1++;
205 }
206 }
207 }
208 rewind(fp_in);
209 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
210 putchar('\r');
211 fflush( stdout ) ;
212 }
213
214 longline_reset(line);
215
216 while ((linebuf=longline_read(fp_in,line))!=NULL) {
217 blen=strlen(linebuf);
218
1c91da07 219#if 0
27d1fa35
FM
220 if (ilf==ILF_Unknown) {
221 if(strncmp(linebuf,"#Software: Mic",14) == 0) {
1c91da07
FM
222 if (debug) {
223 fixendofline(linebuf);
27d1fa35 224 debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
1c91da07 225 }
27d1fa35
FM
226 ilf=ILF_Isa;
227 ilf_count[ilf]++;
228 continue;
229 }
230
231 if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
232 if (getperiod_fromsarglog(arq,&period)<0) {
233 debuga(_("The name of the file is invalid: %s\n"),arq);
234 exit(EXIT_FAILURE);
235 }
236 ilf=ILF_Sarg;
237 ilf_count[ilf]++;
238 continue;
239 }
240 }
1c91da07 241#endif
27d1fa35
FM
242
243 recs2++;
2f4787e6
FM
244 if (ShowReadStatistics && --OutputNonZero<=0) {
245 if (recs1>0) {
246 double perc = recs2 * 100. / recs1 ;
247 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
248 } else {
249 printf(_("SARG: Records in file: %lu"),recs2);
250 }
27d1fa35
FM
251 putchar('\r');
252 fflush (stdout);
253 OutputNonZero = REPORT_EVERY_X_LINES ;
254 }
1c91da07
FM
255
256 /*
257 The following checks are retained here as I don't know to
258 what format they apply. They date back to pre 2.4 versions.
259 */
260 //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line
261 if(strstr(linebuf,"HTTP/0.0") != 0) continue;//recorded by squid when encountering an incomplete query
262 if(strstr(linebuf,"logfile turned over") != 0) continue;//reported by newsyslog
27d1fa35
FM
263 if(linebuf[0] == ' ') continue;
264
265 // exclude_string
266 if(ExcludeString[0] != '\0') {
267 bool exstring=false;
268 getword_start(&gwarea,ExcludeString);
269 while(strchr(gwarea.current,':') != 0) {
270 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
271 debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
272 exit(EXIT_FAILURE);
273 }
274 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
275 exstring=true;
276 break;
277 }
278 }
279 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
280 exstring=true;
281 if(exstring) continue;
282 }
283
284 totregsl++;
285 if(debugm)
286 printf("BUF=%s\n",linebuf);
287
1c91da07
FM
288 // process the line
289 log_entry_status=RLRC_Unknown;
6debcf3d 290 memset(&log_entry,0,sizeof(log_entry));
1c91da07
FM
291 if (current_format) {
292 log_entry_status=current_format->ReadEntry(linebuf,&log_entry);
293 }
294
295 // find out what line format to use
296 if (log_entry_status==RLRC_Unknown) {
297 x=-1;
298 while (log_entry_status==RLRC_Unknown && x<(int)(sizeof(LogFormats)/sizeof(*LogFormats))) {
299 x++;
300 if (LogFormats[x]==current_format) continue;
301 memset(&log_entry,0,sizeof(log_entry));
302 log_entry_status=LogFormats[x]->ReadEntry(linebuf,&log_entry);
303 }
304 if (x<0 || x>=(int)(sizeof(LogFormats)/sizeof(*LogFormats))) {
305 debuga(_("Unknown line format found in input log file %s\n"),arq);
306 exit(EXIT_FAILURE);
307 }
308 current_format=LogFormats[x];
309 current_format_idx=x;
310 if (debugz) {
311 /* TRANSLATORS: The argument is the log format name as translated by you. */
312 debuga(_("Log format identified as \"%s\" for %s\n"),_(current_format->Name),arq);
313 }
314 }
315 if (log_entry_status==RLRC_Ignore) {
316 continue;
317 }
318 if (current_format_idx<0 || current_format==NULL) {
319 debuga(_("Sarg couldn't determine the format of the input log file %s\n"),arq);
320 exit(EXIT_FAILURE);
321 }
322 if (log_entry_status==RLRC_InternalError) {
323 debuga(_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),arq);
324 exit(EXIT_FAILURE);
325 }
326 format_count[current_format_idx]++;
327
328 if (!fp_log && ParsedOutputLog[0] && current_format!=&ReadSargLog) {
329 if(access(ParsedOutputLog,R_OK) != 0) {
330 my_mkdir(ParsedOutputLog);
331 }
332 if (snprintf(arq_log,sizeof(arq_log),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(arq_log)) {
333 debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog);
334 exit(EXIT_FAILURE);
335 }
336 if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
337 debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
338 exit(EXIT_FAILURE);
339 }
340 fputs("*** SARG Log ***\n",fp_log);
341 }
342
343#if 0
27d1fa35
FM
344 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
345 getword_start(&gwarea,linebuf);
346 if (getword(data,sizeof(data),&gwarea,' ')<0) {
347 debuga(_("Maybe you have a broken time in your access.log file\n"));
348 exit(EXIT_FAILURE);
349 }
350 if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
351 strcpy(ip,data);
6debcf3d 352 log_entry.Ip=ip;
27d1fa35
FM
353 if(squid24) {
354 if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
355 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
356 exit(EXIT_FAILURE);
357 }
358 } else {
359 if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
360 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
361 exit(EXIT_FAILURE);
362 }
363 }
525f6fa7 364 log_entry.User=user;
27d1fa35
FM
365 if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
366 getword(fun,sizeof(fun),&gwarea,' ')<0) {
367 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
368 exit(EXIT_FAILURE);
369 }
e2379f05 370 if (getword_ptr(linebuf,&log_entry.Url,&gwarea,' ')<0) {
27d1fa35
FM
371 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
372 exit(EXIT_FAILURE);
373 }
374 if (getword_skip(MAXLEN,&gwarea,' ')<0) {
375 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
376 exit(EXIT_FAILURE);
377 }
378 if (getword(code2,sizeof(code2),&gwarea,' ')<0) {
379 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
380 exit(EXIT_FAILURE);
381 }
2c4bc22b 382 if (getword_atoll(&log_entry.DataSize,&gwarea,' ')<0) {
27d1fa35
FM
383 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
384 exit(EXIT_FAILURE);
385 }
386 if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
387 if (getword(code,sizeof(code),&gwarea,' ')<0) {
388 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
389 exit(EXIT_FAILURE);
390 }
391 } else {
392 if (getword(code,sizeof(code),&gwarea,'\0')<0) {
393 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
394 exit(EXIT_FAILURE);
395 }
396 }
397
398 if ((str = strchr(code, ':')) != NULL)
399 *str = '/';
ae412eca 400 log_entry.HttpCode=code;
27d1fa35 401
27d1fa35
FM
402 ilf=ILF_Common;
403 ilf_count[ilf]++;
404
405 getword_start(&gwarea,data+1);
406 if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
407 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
408 exit(EXIT_FAILURE);
409 }
410 if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
411 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
412 exit(EXIT_FAILURE);
413 }
414 getword_start(&gwarea,data);
415 if (getword_atoll(&iday,&gwarea,'/')<0){
416 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
417 exit(EXIT_FAILURE);
418 }
419 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
420 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
421 exit(EXIT_FAILURE);
422 }
423 if (getword_atoll(&iyear,&gwarea,'/')<0){
424 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
425 exit(EXIT_FAILURE);
426 }
427
428 imonth=month2num(mes)+1;
429 idata=builddia(iday,imonth,iyear);
430 computedate(iyear,imonth,iday,&tt);
431 if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
432 tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
433 debuga(_("Invalid time found in %s\n"),arq);
434 exit(EXIT_FAILURE);
435 }
c5d6ef4b 436 log_entry.EntryTime=&tt;
27d1fa35
FM
437 }
438
439 if(ilf==ILF_Unknown || ilf==ILF_Squid) {
440 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
441 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
442 exit(EXIT_FAILURE);
443 }
444 while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
445 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
446 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
447 exit(EXIT_FAILURE);
448 }
449 if(strlen(elap) < 1) continue;
2c4bc22b 450 log_entry.ElapsedTime=atol(elap);
27d1fa35
FM
451 if (getword(ip,sizeof(ip),&gwarea,' ')<0){
452 debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq);
453 exit(EXIT_FAILURE);
454 }
6debcf3d 455 log_entry.Ip=ip;
27d1fa35
FM
456 if (getword(code,sizeof(code),&gwarea,' ')<0){
457 debuga(_("Maybe you have a broken result code in your %s file\n"),arq);
458 exit(EXIT_FAILURE);
459 }
ae412eca 460 log_entry.HttpCode=code;
2c4bc22b 461 if (getword_atoll(&log_entry.DataSize,&gwarea,' ')<0){
27d1fa35
FM
462 debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq);
463 exit(EXIT_FAILURE);
464 }
465 if (getword(fun,sizeof(fun),&gwarea,' ')<0){
466 debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
467 exit(EXIT_FAILURE);
468 }
e2379f05 469 if (getword_ptr(linebuf,&log_entry.Url,&gwarea,' ')<0){
27d1fa35
FM
470 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
471 exit(EXIT_FAILURE);
472 }
473 if (getword(user,sizeof(user),&gwarea,' ')<0){
474 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
475 exit(EXIT_FAILURE);
476 }
525f6fa7 477 log_entry.User=user;
27d1fa35
FM
478 ilf=ILF_Squid;
479 ilf_count[ilf]++;
480
481 tnum=atoi(data);
c5d6ef4b
FM
482 log_entry.EntryTime=localtime(&tnum);
483 if (log_entry.EntryTime == NULL) {
27d1fa35
FM
484 debuga(_("Cannot convert the timestamp from the squid log file\n"));
485 exit(EXIT_FAILURE);
486 }
487
c5d6ef4b 488 strftime(tbuf2, sizeof(tbuf2), "%H%M", log_entry.EntryTime);
27d1fa35 489
c5d6ef4b 490 idata=(log_entry.EntryTime->tm_year+1900)*10000+(log_entry.EntryTime->tm_mon+1)*100+log_entry.EntryTime->tm_mday;
27d1fa35
FM
491 }
492 }
493 if (ilf==ILF_Sarg) {
494 getword_start(&gwarea,linebuf);
495 if (getword(data,sizeof(data),&gwarea,'\t')<0){
496 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
497 exit(EXIT_FAILURE);
498 }
499 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
500 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
501 exit(EXIT_FAILURE);
502 }
503 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
504 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
505 exit(EXIT_FAILURE);
506 }
525f6fa7 507 log_entry.User=user;
27d1fa35
FM
508 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
509 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
510 exit(EXIT_FAILURE);
511 }
6debcf3d 512 log_entry.Ip=ip;
e2379f05 513 if (getword_ptr(linebuf,&log_entry.Url,&gwarea,'\t')<0){
27d1fa35
FM
514 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
515 exit(EXIT_FAILURE);
516 }
2c4bc22b 517 if (getword_atoll(&log_entry.DataSize,&gwarea,'\t')<0){
27d1fa35
FM
518 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
519 exit(EXIT_FAILURE);
520 }
521 if (getword(code,sizeof(code),&gwarea,'\t')<0){
522 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
523 exit(EXIT_FAILURE);
524 }
ae412eca 525 log_entry.HttpCode=code;
2c4bc22b 526 if (getword_atol(&log_entry.ElapsedTime,&gwarea,'\t')<0){
27d1fa35
FM
527 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
528 exit(EXIT_FAILURE);
529 }
530 if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
531 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
532 exit(EXIT_FAILURE);
533 }
534 getword_start(&gwarea,data);
535 if (getword_atoll(&iday,&gwarea,'/')<0 || iday<1 || iday>31){
536 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
537 exit(EXIT_FAILURE);
538 }
539 if (getword_atoll(&imonth,&gwarea,'/')<0 || imonth<1 || imonth>12){
540 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
541 exit(EXIT_FAILURE);
542 }
543 if (getword_atoll(&iyear,&gwarea,'\0')<0){
544 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
545 exit(EXIT_FAILURE);
546 }
547 idata=builddia(iday,imonth,iyear);
548 computedate(iyear,imonth,iday,&tt);
549 if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
550 tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
551 debuga(_("Invalid time found in %s\n"),arq);
552 exit(EXIT_FAILURE);
553 }
c5d6ef4b 554 log_entry.EntryTime=&tt;
27d1fa35
FM
555 }
556 if (ilf==ILF_Isa) {
557 if (linebuf[0] == '#') {
558 int ncols,cols[ISACOL_Last];
559
560 fixendofline(linebuf);
561 getword_start(&gwarea,linebuf);
562 // remove the #Fields: column at the beginning of the line
563 if (getword_skip(1000,&gwarea,' ')<0){
564 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
565 exit(EXIT_FAILURE);
566 }
567 for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
568 ncols=0;
569 while(gwarea.current[0] != '\0') {
570 if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
571 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
572 exit(EXIT_FAILURE);
573 }
574 if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
575 if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
576 if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
577 if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
578 if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
579 if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
580 if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
581 if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
582 ncols++;
583 }
584 if (cols[ISACOL_Ip]>=0) {
585 isa_ncols=ncols;
586 for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
587 isa_cols[ncols]=cols[ncols];
588 }
589 continue;
590 }
591 if (!isa_ncols) continue;
592 getword_start(&gwarea,linebuf);
593 for (x=0 ; x<isa_ncols ; x++) {
594 if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
595 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
596 exit(EXIT_FAILURE);
597 }
598 if (x==isa_cols[ISACOL_Ip]) {
599 if (strlen(str)>=sizeof(ip)) {
600 debuga(_("Maybe you have a broken IP in your %s file\n"),arq);
601 exit(EXIT_FAILURE);
602 }
603 strcpy(ip,str);
6debcf3d 604 log_entry.Ip=ip;
27d1fa35
FM
605 } else if (x==isa_cols[ISACOL_UserName]) {
606 if (strlen(str)>=sizeof(user)) {
607 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
608 exit(EXIT_FAILURE);
609 }
610 strcpy(user,str);
525f6fa7 611 log_entry.User=user;
27d1fa35
FM
612 } else if (x==isa_cols[ISACOL_Date]) {
613 if (strlen(str)>=sizeof(data)) {
614 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
615 exit(EXIT_FAILURE);
616 }
617 strcpy(data,str);
618 } else if (x==isa_cols[ISACOL_Time]) {
619 if (strlen(str)>=sizeof(hora)) {
620 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
621 exit(EXIT_FAILURE);
622 }
623 strcpy(hora,str);
624 } else if (x==isa_cols[ISACOL_TimeTaken]) {
2c4bc22b 625 log_entry.ElapsedTime=atol(str);
27d1fa35 626 } else if (x==isa_cols[ISACOL_Bytes]) {
2c4bc22b 627 log_entry.DataSize=atoll(str);
27d1fa35 628 } else if (x==isa_cols[ISACOL_Uri]) {
e2379f05 629 log_entry.Url=str;
27d1fa35
FM
630 } else if (x==isa_cols[ISACOL_Status]) {
631 if (strlen(str)>=sizeof(code)) {
632 debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
633 exit(EXIT_FAILURE);
634 }
635 strcpy(code,str);
636 }
637 }
638
639 if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
ae412eca
FM
640 static char valcode[12];
641 sprintf(valcode,"DENIED/%s",code);
642 log_entry.HttpCode=valcode;
643 } else {
644 log_entry.HttpCode=code;
27d1fa35
FM
645 }
646 getword_start(&gwarea,data);
647 if (getword_atoll(&iyear,&gwarea,'-')<0){
648 debuga(_("Maybe you have a broken year in your %s file\n"),arq);
649 exit(EXIT_FAILURE);
650 }
651 if (getword_atoll(&imonth,&gwarea,'-')<0){
652 debuga(_("Maybe you have a broken month in your %s file\n"),arq);
653 exit(EXIT_FAILURE);
654 }
655 if (getword_atoll(&iday,&gwarea,'\0')<0){
656 debuga(_("Maybe you have a broken day in your %s file\n"),arq);
657 exit(EXIT_FAILURE);
658 }
659
660 idata=builddia(iday,imonth,iyear);
661 computedate(iyear,imonth,iday,&tt);
662 if (isa_cols[ISACOL_Time]>=0) {
663 if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
664 tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
665 debuga(_("Invalid time found in %s\n"),arq);
666 exit(EXIT_FAILURE);
667 }
668 }
c5d6ef4b 669 log_entry.EntryTime=&tt;
27d1fa35 670 }
1c91da07 671#endif
6debcf3d
FM
672 if (log_entry.Ip==NULL) {
673 debuga(_("Unknown input log file format: no IP addresses\n"));
27d1fa35
FM
674 break;
675 }
525f6fa7
FM
676 if (log_entry.User==NULL) {
677 debuga(_("Unknown input log file format: no user\n"));
678 break;
679 }
e2379f05
FM
680 if (log_entry.Url==NULL) {
681 debuga(_("Unknown input log file format: no URL\n"));
682 break;
683 }
27d1fa35 684
27d1fa35
FM
685 if(debugm)
686 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil);
687
688 if(Filter->DateRange[0] != '\0'){
689 if(idata < dfrom || idata > duntil) continue;
690 }
691
692 // Record only hours usage which is required
cb53374b 693 if( bsearch( &( log_entry.EntryTime.tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL )
c5d6ef4b 694 continue;
27d1fa35 695
cb53374b 696 if( bsearch( &( log_entry.EntryTime.tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL )
c5d6ef4b 697 continue;
27d1fa35
FM
698
699
525f6fa7
FM
700 if(strlen(log_entry.User) > MAX_USER_LEN) {
701 if (debugm) printf(_("User ID too long: %s\n"),log_entry.User);
27d1fa35
FM
702 totregsx++;
703 continue;
704 }
705
706 // include_users
707 if(IncludeUsers[0] != '\0') {
525f6fa7 708 snprintf(val1,sizeof(val1),":%s:",log_entry.User);
27d1fa35
FM
709 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
710 continue;
711 }
712
ae412eca
FM
713 if(vercode(log_entry.HttpCode)) {
714 if (debugm) printf(_("Excluded code: %s\n"),log_entry.HttpCode);
27d1fa35
FM
715 totregsx++;
716 continue;
717 }
718
525f6fa7 719 if(testvaliduserchar(log_entry.User))
27d1fa35
FM
720 continue;
721
27d1fa35 722 // replace any tab by a single space
e2379f05 723 for (str=log_entry.Url ; *str ; str++)
27d1fa35 724 if (*str=='\t') *str=' ';
ae412eca 725 for (str=log_entry.HttpCode ; *str ; str++)
27d1fa35
FM
726 if (*str=='\t') *str=' ';
727
1c91da07 728 if (current_format!=&ReadSargLog) {
27d1fa35
FM
729 /*
730 The full URL is not saved in sarg log. There is no point in testing the URL to detect
731 a downloaded file.
732 */
e2379f05 733 download_flag=is_download_suffix(log_entry.Url);
27d1fa35 734 if (download_flag) {
e2379f05 735 safe_strcpy(download_url,log_entry.Url,sizeof(download_url));
27d1fa35
FM
736 download_count++;
737 }
738 } else
739 download_flag=false;
740
e2379f05 741 url=process_url(log_entry.Url,LongUrl);
27d1fa35
FM
742 if (!url || url[0] == '\0') continue;
743
744 if(addr[0] != '\0'){
6debcf3d 745 if(strcmp(addr,log_entry.Ip)!=0) continue;
27d1fa35
FM
746 }
747 if(Filter->HostFilter) {
748 if(!vhexclude(url)) {
749 if (debugm) printf(_("Excluded site: %s\n"),url);
750 totregsx++;
751 continue;
752 }
753 }
754
755 if(Filter->StartTime >= 0 && Filter->EndTime >= 0) {
cb53374b 756 hmr=log_entry.EntryTime.tm_hour*100+log_entry.EntryTime.tm_min;
27d1fa35
FM
757 if(hmr < Filter->StartTime || hmr > Filter->EndTime) continue;
758 }
759
760 if(site[0] != '\0'){
761 if(strstr(url,site)==0) continue;
762 }
763
764 if(UserIp) {
525f6fa7 765 log_entry.User=log_entry.Ip;
27d1fa35
FM
766 id_is_ip=true;
767 } else {
768 id_is_ip=false;
81a022d8 769 if ((log_entry.User[0]=='\0') || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' || log_entry.User[0]==' '))) {
27d1fa35 770 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
525f6fa7 771 log_entry.User=log_entry.Ip;
27d1fa35
FM
772 id_is_ip=true;
773 }
774 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
775 continue;
776 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
525f6fa7 777 log_entry.User="everybody";
27d1fa35 778 } else {
525f6fa7 779 strlow(log_entry.User);
27d1fa35 780 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
6b3052a2 781 if ((str=strchr(user,'+'))!=NULL || (str=strchr(user,'\\'))!=NULL || (str=strchr(user,'_'))!=NULL) {
525f6fa7 782 log_entry.User=str+1;
27d1fa35
FM
783 }
784 }
785 }
786 }
787
788 if(us[0] != '\0'){
525f6fa7 789 if(strcmp(log_entry.User,us)!=0) continue;
27d1fa35
FM
790 }
791
792 if(Filter->SysUsers) {
525f6fa7 793 snprintf(wuser,sizeof(wuser),":%s:",log_entry.User);
27d1fa35
FM
794 if(strstr(userfile, wuser) == 0)
795 continue;
796 }
797
798 if(Filter->UserFilter) {
525f6fa7
FM
799 if(!vuexclude(log_entry.User)) {
800 if (debugm) printf(_("Excluded user: %s\n"),log_entry.User);
27d1fa35
FM
801 totregsx++;
802 continue;
803 }
804 }
805
1c91da07
FM
806 if (log_entry.User[0]=='\0' || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' ||
807 log_entry.User[0]==' ' || log_entry.User[0]==':')))
27d1fa35
FM
808 continue;
809
2c4bc22b 810 if (log_entry.DataSize<0) log_entry.DataSize=0;
27d1fa35 811
2c4bc22b
FM
812 if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0;
813 if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) {
814 log_entry.ElapsedTime=0;
27d1fa35
FM
815 }
816
817 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
818 fixendofline(str);
819 snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1);
820 } else strcpy(smartfilter,"\"\"");
821
822 nopen=0;
823 prev_ufile=NULL;
525f6fa7 824 for (ufile=first_user_file ; ufile && strcmp(log_entry.User,ufile->user->id)!=0 ; ufile=ufile->next) {
27d1fa35
FM
825 prev_ufile=ufile;
826 if (ufile->file) nopen++;
827 }
828 if (!ufile) {
829 ufile=malloc(sizeof(*ufile));
830 if (!ufile) {
525f6fa7 831 debuga(_("Not enough memory to store the user %s\n"),log_entry.User);
27d1fa35
FM
832 exit(EXIT_FAILURE);
833 }
834 memset(ufile,0,sizeof(*ufile));
835 ufile->next=first_user_file;
836 first_user_file=ufile;
525f6fa7 837 uinfo=userinfo_create(log_entry.User);
27d1fa35
FM
838 ufile->user=uinfo;
839 uinfo->id_is_ip=id_is_ip;
840 } else {
841 if (prev_ufile) {
842 prev_ufile->next=ufile->next;
843 ufile->next=first_user_file;
844 first_user_file=ufile;
845 }
846 }
1c91da07 847#ifdef ENABLE_DOUBLE_CHECK_DATA
2c4bc22b
FM
848 ufile->user->nbytes+=log_entry.DataSize;
849 ufile->user->elap+=log_entry.ElapsedTime;
1c91da07 850#endif
27d1fa35
FM
851
852 if (ufile->file==NULL) {
853 if (nopen>=maxopenfiles) {
854 x=0;
855 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
856 if (ufile1->file!=NULL) {
857 if (x>=maxopenfiles) {
858 if (fclose(ufile1->file)==EOF) {
859 debuga(_("Failed to close the log file of user %s - %s\n"),ufile1->user->id,strerror(errno));
860 exit(EXIT_FAILURE);
861 }
862 ufile1->file=NULL;
863 }
864 x++;
865 }
866 }
867 }
b378aaf1
FM
868 if (snprintf (tmp3, sizeof(tmp3), "%s/%s.user_unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
869 debuga(_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp, ufile->user->filename);
27d1fa35
FM
870 exit(EXIT_FAILURE);
871 }
872 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
873 debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno));
874 exit (1);
875 }
876 }
877
cb53374b
FM
878 strftime(dia, sizeof(dia), "%d/%m/%Y",&log_entry.EntryTime);
879 strftime(hora,sizeof(hora),"%H:%M:%S",&log_entry.EntryTime);
c5d6ef4b 880
1c91da07
FM
881 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
882 log_entry.Ip,url,(uint64_t)log_entry.DataSize,
883 log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
525f6fa7 884 debuga(_("Write error in the log file of user %s\n"),log_entry.User);
27d1fa35
FM
885 exit(EXIT_FAILURE);
886 }
887
1c91da07
FM
888 if (fp_log && current_format!=&ReadSargLog) {
889 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
890 log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,
891 log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
892 }
27d1fa35
FM
893
894 totregsg++;
895
ae412eca 896 if(!dataonly && download_flag && strstr(log_entry.HttpCode,"DENIED") == 0) {
27d1fa35
FM
897 ndownload = 1;
898
899 if ( ! fp_Download_Unsort ) {
900 if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
901 debuga(_("(log) Cannot open temporary file: %s - %s\n"),sz_Download_Unsort, strerror(errno));
902 exit (1);
903 }
904 }
1c91da07
FM
905 fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,
906 log_entry.User,log_entry.Ip,download_url);
27d1fa35
FM
907 }
908
8e53b2e7 909 denied_write(&log_entry);
16b013cc 910 authfail_write(&log_entry);
27d1fa35 911
1c91da07 912 if (current_format!=&ReadSargLog) {
27d1fa35
FM
913 if(!totper || idata<mindate){
914 mindate=idata;
cb53374b 915 memcpy(&period.start,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
27d1fa35
FM
916 strcpy(start_hour,tbuf2);
917 }
918 if (!totper || idata>maxdate) {
919 maxdate=idata;
cb53374b 920 memcpy(&period.end,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
27d1fa35
FM
921 }
922 totper=true;
923 }
924
925 if(debugm){
6debcf3d 926 printf("IP=\t%s\n",log_entry.Ip);
525f6fa7 927 printf("USER=\t%s\n",log_entry.User);
2c4bc22b 928 printf("ELAP=\t%ld\n",log_entry.ElapsedTime);
27d1fa35
FM
929 printf("DATE=\t%s\n",dia);
930 printf("TIME=\t%s\n",hora);
1c91da07 931 //printf("FUNC=\t%s\n",fun);
27d1fa35 932 printf("URL=\t%s\n",url);
ae412eca 933 printf("CODE=\t%s\n",log_entry.HttpCode);
2c4bc22b 934 printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize);
27d1fa35
FM
935 }
936 }
937
938 if (!from_stdin) {
939 if (from_pipe)
940 pclose(fp_in);
941 else {
942 fclose(fp_in);
2f4787e6
FM
943 if (ShowReadStatistics) {
944 if (ShowReadPercent)
945 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2, (float) 100 );
946 else
947 printf(_("SARG: Records in file: %lu\n"),recs2);
948 }
27d1fa35
FM
949 }
950 }
951 }
952 longline_destroy(&line);
953
954 if(fp_log != NULL) {
955 char end_hour[128];
956 char val2[40];
957 char val4[255];//val4 must not be bigger than arq_log without fixing the strcpy below
958
959 fclose(fp_log);
960 safe_strcpy(end_hour,tbuf2,sizeof(end_hour));
961 strftime(val2,sizeof(val2),"%d%m%Y",&period.start);
962 strftime(val1,sizeof(val1),"%d%m%Y",&period.end);
963 if (snprintf(val4,sizeof(val4),"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour)>=sizeof(val4)) {
964 debuga(_("File name too long: %s/sarg-%s_%s-%s_%s.log\n"),ParsedOutputLog,val2,start_hour,val1,end_hour);
965 exit(EXIT_FAILURE);
966 }
967 if (rename(arq_log,val4)) {
968 debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno));
969 } else {
970 strcpy(arq_log,val4);
971
972 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
973 /*
974 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
975 necessary around the command name, put them in the configuration file.
976 */
977 if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,arq_log)>=sizeof(val1)) {
978 debuga(_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,arq_log);
979 exit(EXIT_FAILURE);
980 }
981 cstatus=system(val1);
982 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
983 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
984 debuga(_("command: %s\n"),val1);
985 exit(EXIT_FAILURE);
986 }
987 }
988 }
989 if(debug)
990 debuga(_("Sarg parsed log saved as %s\n"),arq_log);
991 }
992
8e53b2e7 993 denied_close();
16b013cc 994 authfail_close();
27d1fa35
FM
995 if (fp_Download_Unsort) fclose (fp_Download_Unsort);
996
997 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
998 ufile1=ufile->next;
999 if (ufile->file!=NULL) fclose(ufile->file);
1000 free(ufile);
1001 }
1002
1003 if (debug) {
1004 int totalcount=0;
1005
1006 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
1007
1c91da07
FM
1008 for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) {
1009 if (format_count[x]>0) {
1010 /* TRANSLATORS: It displays the number of lines found in the input log files
1011 * for each supported log format. The log format name is the %s and is a string
1012 * you translate somewhere else. */
1013 debuga(_("%s: %d entries\n"),_(LogFormats[x]->Name),format_count[x]);
1014 totalcount+=format_count[x];
1015 }
1016 }
27d1fa35 1017
1c91da07 1018 if (totalcount==0 && totregsg)
27d1fa35
FM
1019 debuga(_("Log with invalid format\n"));
1020 }
1021
1c91da07 1022 if(debugz) {
27d1fa35
FM
1023 debugaz(_("date=%s\n"),dia);
1024 debugaz(_("period=%s\n"),period.text);
1025 }
1026
27d1fa35
FM
1027 return((totregsg!=0) ? 1 : 0);
1028}