]> git.ipfire.org Git - thirdparty/sarg.git/blob - readlog.c
Don't show the input log reading percentage
[thirdparty/sarg.git] / readlog.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2012
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
30
31 #define REPORT_EVERY_X_LINES 5000
32 #define MAX_OPEN_USER_FILES 10
33
34 struct userfilestruct
35 {
36 struct userfilestruct *next;
37 struct userinfostruct *user;
38 FILE *file;
39 };
40
41 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
42 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
43
44 extern char *userfile;
45
46 /*!
47 Read the log files.
48
49 \param Filter The filtering parameters for the file to load.
50
51 \retval 1 Records found.
52 \retval 0 No record found.
53 */
54 int ReadLogFile(struct ReadLogDataStruct *Filter)
55 {
56 enum isa_col_id {
57 ISACOL_Ip,
58 ISACOL_UserName,
59 ISACOL_Date,
60 ISACOL_Time,
61 ISACOL_TimeTaken,
62 ISACOL_Bytes,
63 ISACOL_Uri,
64 ISACOL_Status,
65 ISACOL_Last //last entry of the list !
66 };
67 enum InputLogFormat {
68 ILF_Unknown,
69 ILF_Squid,
70 ILF_Common,
71 ILF_Sarg,
72 ILF_Isa,
73 ILF_Last //last entry of the list !
74 };
75
76 enum InputLogFormat ilf;
77 int ilf_count[ILF_Last];
78 longline line;
79 char *linebuf;
80 char *str;
81 char arq_log[255];
82 char fun[MAXLEN];
83 char elap[255];
84 char user[MAX_USER_LEN];
85 char data[255];
86 char ip[60];
87 char hora[30];
88 char mes[30];
89 char tbuf2[128];
90 char dia[128]="";
91 char wuser[MAXLEN];
92 char tmp3[MAXLEN];
93 char sz_Download_Unsort[20000];
94 char start_hour[128];
95 char download_url[MAXLEN];
96 char smartfilter[MAXLEN];
97 const char *arq;
98 const char *url;
99 int iarq;
100 int blen;
101 int OutputNonZero = REPORT_EVERY_X_LINES ;
102 int idata=0;
103 int isa_ncols=0,isa_cols[ISACOL_Last];
104 int x;
105 int hmr;
106 int nopen;
107 int maxopenfiles=MAX_OPEN_USER_FILES;
108 int mindate=0;
109 int maxdate=0;
110 int cstatus;
111 unsigned long int recs1=0UL;
112 unsigned long int recs2=0UL;
113 long int totregsl=0;
114 long int totregsg=0;
115 long int totregsx=0;
116 long long int iyear, imonth, iday;
117 FILE *fp_in=NULL;
118 FILE *fp_log=NULL;
119 FILE *fp_Download_Unsort=NULL;
120 bool from_pipe;
121 bool from_stdin;
122 bool download_flag=false;
123 bool id_is_ip;
124 bool totper=false;
125 struct stat logstat;
126 struct getwordstruct gwarea;
127 struct tm tt;
128 struct userfilestruct *prev_ufile;
129 struct userinfostruct *uinfo;
130 struct userfilestruct *first_user_file=NULL;
131 struct userfilestruct *ufile;
132 struct userfilestruct *ufile1;
133 struct ReadLogStruct log_entry;
134 time_t tnum;
135
136 for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
137 tmp3[0]='\0';
138 start_hour[0]='\0';
139 first_user_file=NULL;
140
141 snprintf(sz_Download_Unsort,sizeof(sz_Download_Unsort),"%s/download.int_unsort", tmp);
142
143 if(DataFile[0]=='\0') {
144 denied_open();
145 authfail_open();
146 }
147
148 if ((line=longline_create())==NULL) {
149 debuga(_("Not enough memory to read a log file\n"));
150 exit(EXIT_FAILURE);
151 }
152
153 for (iarq=0 ; iarq<NAccessLog ; iarq++) {
154 arq=AccessLog[iarq];
155
156 if(strcmp(arq,"-")==0) {
157 if(debug)
158 debuga(_("Reading access log file: from stdin\n"));
159 fp_in=stdin;
160 from_stdin=true;
161 } else {
162 if (Filter->DateRange[0]!='\0') {
163 if (stat(arq,&logstat)!=0) {
164 debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
165 } else {
166 struct tm *logtime=localtime(&logstat.st_mtime);
167 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
168 debuga(_("Ignoring old log file %s\n"),arq);
169 continue;
170 }
171 }
172 }
173 fp_in=decomp(arq,&from_pipe);
174 if(fp_in==NULL) {
175 debuga(_("(log) Cannot open log file: %s - %s\n"),arq,strerror(errno));
176 exit(EXIT_FAILURE);
177 }
178 if(debug) debuga(_("Reading access log file: %s\n"),arq);
179 from_stdin=false;
180 }
181
182 ilf=ILF_Unknown;
183 download_flag=false;
184
185 recs1=0UL;
186 recs2=0UL;
187
188 // pre-read the file only if we have to show stats
189 if (ShowReadStatistics && ShowReadPercent && !from_stdin && !from_pipe) {
190 size_t nread,i;
191 bool skipcr=false;
192 char tmp4[MAXLEN];
193
194 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
195 for (i=0 ; i<nread ; i++)
196 if (skipcr) {
197 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
198 skipcr=false;
199 }
200 } else {
201 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
202 skipcr=true;
203 recs1++;
204 }
205 }
206 }
207 rewind(fp_in);
208 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
209 putchar('\r');
210 fflush( stdout ) ;
211 }
212
213 longline_reset(line);
214
215 while ((linebuf=longline_read(fp_in,line))!=NULL) {
216 blen=strlen(linebuf);
217
218 if (ilf==ILF_Unknown) {
219 if(strncmp(linebuf,"#Software: Mic",14) == 0) {
220 fixendofline(linebuf);
221 if (debug)
222 debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
223 ilf=ILF_Isa;
224 ilf_count[ilf]++;
225 continue;
226 }
227
228 if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
229 if (getperiod_fromsarglog(arq,&period)<0) {
230 debuga(_("The name of the file is invalid: %s\n"),arq);
231 exit(EXIT_FAILURE);
232 }
233 ilf=ILF_Sarg;
234 ilf_count[ilf]++;
235 continue;
236 }
237 }
238
239 if(!fp_log && ParsedOutputLog[0] && ilf!=ILF_Sarg) {
240 if(access(ParsedOutputLog,R_OK) != 0) {
241 my_mkdir(ParsedOutputLog);
242 }
243 if (snprintf(arq_log,sizeof(arq_log),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(arq_log)) {
244 debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog);
245 exit(EXIT_FAILURE);
246 }
247 if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
248 debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
249 exit(EXIT_FAILURE);
250 }
251 fputs("*** SARG Log ***\n",fp_log);
252 }
253
254 recs2++;
255 if (ShowReadStatistics && --OutputNonZero<=0) {
256 if (recs1>0) {
257 double perc = recs2 * 100. / recs1 ;
258 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
259 } else {
260 printf(_("SARG: Records in file: %lu"),recs2);
261 }
262 putchar('\r');
263 fflush (stdout);
264 OutputNonZero = REPORT_EVERY_X_LINES ;
265 }
266 if(blen < 58) continue;
267 if(strstr(linebuf,"HTTP/0.0") != 0) continue;
268 if(strstr(linebuf,"logfile turned over") != 0) continue;
269 if(linebuf[0] == ' ') continue;
270
271 // exclude_string
272 if(ExcludeString[0] != '\0') {
273 bool exstring=false;
274 getword_start(&gwarea,ExcludeString);
275 while(strchr(gwarea.current,':') != 0) {
276 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
277 debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
278 exit(EXIT_FAILURE);
279 }
280 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
281 exstring=true;
282 break;
283 }
284 }
285 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
286 exstring=true;
287 if(exstring) continue;
288 }
289
290 totregsl++;
291 if(debugm)
292 printf("BUF=%s\n",linebuf);
293
294 memset(&log_entry,0,sizeof(log_entry));
295 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
296 getword_start(&gwarea,linebuf);
297 if (getword(data,sizeof(data),&gwarea,' ')<0) {
298 debuga(_("Maybe you have a broken time in your access.log file\n"));
299 exit(EXIT_FAILURE);
300 }
301 if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
302 strcpy(ip,data);
303 log_entry.Ip=ip;
304 if(squid24) {
305 if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
306 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
307 exit(EXIT_FAILURE);
308 }
309 } else {
310 if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
311 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
312 exit(EXIT_FAILURE);
313 }
314 }
315 log_entry.User=user;
316 if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
317 getword(fun,sizeof(fun),&gwarea,' ')<0) {
318 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
319 exit(EXIT_FAILURE);
320 }
321 if (getword_ptr(linebuf,&log_entry.Url,&gwarea,' ')<0) {
322 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
323 exit(EXIT_FAILURE);
324 }
325 if (getword_skip(MAXLEN,&gwarea,' ')<0) {
326 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
327 exit(EXIT_FAILURE);
328 }
329 if (getword(code2,sizeof(code2),&gwarea,' ')<0) {
330 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
331 exit(EXIT_FAILURE);
332 }
333 if (getword_atoll(&log_entry.DataSize,&gwarea,' ')<0) {
334 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
335 exit(EXIT_FAILURE);
336 }
337 if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
338 if (getword(code,sizeof(code),&gwarea,' ')<0) {
339 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
340 exit(EXIT_FAILURE);
341 }
342 } else {
343 if (getword(code,sizeof(code),&gwarea,'\0')<0) {
344 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
345 exit(EXIT_FAILURE);
346 }
347 }
348
349 if ((str = strchr(code, ':')) != NULL)
350 *str = '/';
351 log_entry.HttpCode=code;
352
353 ilf=ILF_Common;
354 ilf_count[ilf]++;
355
356 getword_start(&gwarea,data+1);
357 if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
358 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
359 exit(EXIT_FAILURE);
360 }
361 if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
362 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
363 exit(EXIT_FAILURE);
364 }
365 getword_start(&gwarea,data);
366 if (getword_atoll(&iday,&gwarea,'/')<0){
367 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
368 exit(EXIT_FAILURE);
369 }
370 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
371 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
372 exit(EXIT_FAILURE);
373 }
374 if (getword_atoll(&iyear,&gwarea,'/')<0){
375 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
376 exit(EXIT_FAILURE);
377 }
378
379 imonth=month2num(mes)+1;
380 idata=builddia(iday,imonth,iyear);
381 computedate(iyear,imonth,iday,&tt);
382 if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
383 tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
384 debuga(_("Invalid time found in %s\n"),arq);
385 exit(EXIT_FAILURE);
386 }
387 log_entry.EntryTime=&tt;
388 }
389
390 if(ilf==ILF_Unknown || ilf==ILF_Squid) {
391 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
392 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
393 exit(EXIT_FAILURE);
394 }
395 while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
396 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
397 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
398 exit(EXIT_FAILURE);
399 }
400 if(strlen(elap) < 1) continue;
401 log_entry.ElapsedTime=atol(elap);
402 if (getword(ip,sizeof(ip),&gwarea,' ')<0){
403 debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq);
404 exit(EXIT_FAILURE);
405 }
406 log_entry.Ip=ip;
407 if (getword(code,sizeof(code),&gwarea,' ')<0){
408 debuga(_("Maybe you have a broken result code in your %s file\n"),arq);
409 exit(EXIT_FAILURE);
410 }
411 log_entry.HttpCode=code;
412 if (getword_atoll(&log_entry.DataSize,&gwarea,' ')<0){
413 debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq);
414 exit(EXIT_FAILURE);
415 }
416 if (getword(fun,sizeof(fun),&gwarea,' ')<0){
417 debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
418 exit(EXIT_FAILURE);
419 }
420 if (getword_ptr(linebuf,&log_entry.Url,&gwarea,' ')<0){
421 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
422 exit(EXIT_FAILURE);
423 }
424 if (getword(user,sizeof(user),&gwarea,' ')<0){
425 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
426 exit(EXIT_FAILURE);
427 }
428 log_entry.User=user;
429 ilf=ILF_Squid;
430 ilf_count[ilf]++;
431
432 tnum=atoi(data);
433 log_entry.EntryTime=localtime(&tnum);
434 if (log_entry.EntryTime == NULL) {
435 debuga(_("Cannot convert the timestamp from the squid log file\n"));
436 exit(EXIT_FAILURE);
437 }
438
439 strftime(tbuf2, sizeof(tbuf2), "%H%M", log_entry.EntryTime);
440
441 idata=(log_entry.EntryTime->tm_year+1900)*10000+(log_entry.EntryTime->tm_mon+1)*100+log_entry.EntryTime->tm_mday;
442 }
443 }
444 if (ilf==ILF_Sarg) {
445 getword_start(&gwarea,linebuf);
446 if (getword(data,sizeof(data),&gwarea,'\t')<0){
447 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
448 exit(EXIT_FAILURE);
449 }
450 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
451 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
452 exit(EXIT_FAILURE);
453 }
454 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
455 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
456 exit(EXIT_FAILURE);
457 }
458 log_entry.User=user;
459 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
460 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
461 exit(EXIT_FAILURE);
462 }
463 log_entry.Ip=ip;
464 if (getword_ptr(linebuf,&log_entry.Url,&gwarea,'\t')<0){
465 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
466 exit(EXIT_FAILURE);
467 }
468 if (getword_atoll(&log_entry.DataSize,&gwarea,'\t')<0){
469 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
470 exit(EXIT_FAILURE);
471 }
472 if (getword(code,sizeof(code),&gwarea,'\t')<0){
473 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
474 exit(EXIT_FAILURE);
475 }
476 log_entry.HttpCode=code;
477 if (getword_atol(&log_entry.ElapsedTime,&gwarea,'\t')<0){
478 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
479 exit(EXIT_FAILURE);
480 }
481 if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
482 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
483 exit(EXIT_FAILURE);
484 }
485 getword_start(&gwarea,data);
486 if (getword_atoll(&iday,&gwarea,'/')<0 || iday<1 || iday>31){
487 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
488 exit(EXIT_FAILURE);
489 }
490 if (getword_atoll(&imonth,&gwarea,'/')<0 || imonth<1 || imonth>12){
491 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
492 exit(EXIT_FAILURE);
493 }
494 if (getword_atoll(&iyear,&gwarea,'\0')<0){
495 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
496 exit(EXIT_FAILURE);
497 }
498 idata=builddia(iday,imonth,iyear);
499 computedate(iyear,imonth,iday,&tt);
500 if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
501 tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
502 debuga(_("Invalid time found in %s\n"),arq);
503 exit(EXIT_FAILURE);
504 }
505 log_entry.EntryTime=&tt;
506 }
507 if (ilf==ILF_Isa) {
508 if (linebuf[0] == '#') {
509 int ncols,cols[ISACOL_Last];
510
511 fixendofline(linebuf);
512 getword_start(&gwarea,linebuf);
513 // remove the #Fields: column at the beginning of the line
514 if (getword_skip(1000,&gwarea,' ')<0){
515 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
516 exit(EXIT_FAILURE);
517 }
518 for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
519 ncols=0;
520 while(gwarea.current[0] != '\0') {
521 if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
522 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
523 exit(EXIT_FAILURE);
524 }
525 if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
526 if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
527 if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
528 if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
529 if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
530 if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
531 if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
532 if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
533 ncols++;
534 }
535 if (cols[ISACOL_Ip]>=0) {
536 isa_ncols=ncols;
537 for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
538 isa_cols[ncols]=cols[ncols];
539 }
540 continue;
541 }
542 if (!isa_ncols) continue;
543 getword_start(&gwarea,linebuf);
544 for (x=0 ; x<isa_ncols ; x++) {
545 if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
546 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
547 exit(EXIT_FAILURE);
548 }
549 if (x==isa_cols[ISACOL_Ip]) {
550 if (strlen(str)>=sizeof(ip)) {
551 debuga(_("Maybe you have a broken IP in your %s file\n"),arq);
552 exit(EXIT_FAILURE);
553 }
554 strcpy(ip,str);
555 log_entry.Ip=ip;
556 } else if (x==isa_cols[ISACOL_UserName]) {
557 if (strlen(str)>=sizeof(user)) {
558 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
559 exit(EXIT_FAILURE);
560 }
561 strcpy(user,str);
562 log_entry.User=user;
563 } else if (x==isa_cols[ISACOL_Date]) {
564 if (strlen(str)>=sizeof(data)) {
565 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
566 exit(EXIT_FAILURE);
567 }
568 strcpy(data,str);
569 } else if (x==isa_cols[ISACOL_Time]) {
570 if (strlen(str)>=sizeof(hora)) {
571 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
572 exit(EXIT_FAILURE);
573 }
574 strcpy(hora,str);
575 } else if (x==isa_cols[ISACOL_TimeTaken]) {
576 log_entry.ElapsedTime=atol(str);
577 } else if (x==isa_cols[ISACOL_Bytes]) {
578 log_entry.DataSize=atoll(str);
579 } else if (x==isa_cols[ISACOL_Uri]) {
580 log_entry.Url=str;
581 } else if (x==isa_cols[ISACOL_Status]) {
582 if (strlen(str)>=sizeof(code)) {
583 debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
584 exit(EXIT_FAILURE);
585 }
586 strcpy(code,str);
587 }
588 }
589
590 if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
591 static char valcode[12];
592 sprintf(valcode,"DENIED/%s",code);
593 log_entry.HttpCode=valcode;
594 } else {
595 log_entry.HttpCode=code;
596 }
597 getword_start(&gwarea,data);
598 if (getword_atoll(&iyear,&gwarea,'-')<0){
599 debuga(_("Maybe you have a broken year in your %s file\n"),arq);
600 exit(EXIT_FAILURE);
601 }
602 if (getword_atoll(&imonth,&gwarea,'-')<0){
603 debuga(_("Maybe you have a broken month in your %s file\n"),arq);
604 exit(EXIT_FAILURE);
605 }
606 if (getword_atoll(&iday,&gwarea,'\0')<0){
607 debuga(_("Maybe you have a broken day in your %s file\n"),arq);
608 exit(EXIT_FAILURE);
609 }
610
611 idata=builddia(iday,imonth,iyear);
612 computedate(iyear,imonth,iday,&tt);
613 if (isa_cols[ISACOL_Time]>=0) {
614 if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
615 tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
616 debuga(_("Invalid time found in %s\n"),arq);
617 exit(EXIT_FAILURE);
618 }
619 }
620 log_entry.EntryTime=&tt;
621 }
622 if (log_entry.EntryTime==NULL) {
623 debuga(_("Unknown input log file format: no time\n"));
624 break;
625 }
626 if (log_entry.Ip==NULL) {
627 debuga(_("Unknown input log file format: no IP addresses\n"));
628 break;
629 }
630 if (log_entry.User==NULL) {
631 debuga(_("Unknown input log file format: no user\n"));
632 break;
633 }
634 if (log_entry.Url==NULL) {
635 debuga(_("Unknown input log file format: no URL\n"));
636 break;
637 }
638
639 if(debugm)
640 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil);
641
642 if(Filter->DateRange[0] != '\0'){
643 if(idata < dfrom || idata > duntil) continue;
644 }
645
646 // Record only hours usage which is required
647 if( bsearch( &( log_entry.EntryTime->tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL )
648 continue;
649
650 if( bsearch( &( log_entry.EntryTime->tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL )
651 continue;
652
653
654 if(strlen(log_entry.User) > MAX_USER_LEN) {
655 if (debugm) printf(_("User ID too long: %s\n"),log_entry.User);
656 totregsx++;
657 continue;
658 }
659
660 // include_users
661 if(IncludeUsers[0] != '\0') {
662 snprintf(val1,sizeof(val1),":%s:",log_entry.User);
663 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
664 continue;
665 }
666
667 if(vercode(log_entry.HttpCode)) {
668 if (debugm) printf(_("Excluded code: %s\n"),log_entry.HttpCode);
669 totregsx++;
670 continue;
671 }
672
673 if(testvaliduserchar(log_entry.User))
674 continue;
675
676 // replace any tab by a single space
677 for (str=log_entry.Url ; *str ; str++)
678 if (*str=='\t') *str=' ';
679 for (str=log_entry.HttpCode ; *str ; str++)
680 if (*str=='\t') *str=' ';
681
682 if(ilf!=ILF_Sarg) {
683 /*
684 The full URL is not saved in sarg log. There is no point in testing the URL to detect
685 a downloaded file.
686 */
687 download_flag=is_download_suffix(log_entry.Url);
688 if (download_flag) {
689 safe_strcpy(download_url,log_entry.Url,sizeof(download_url));
690 download_count++;
691 }
692 } else
693 download_flag=false;
694
695 url=process_url(log_entry.Url,LongUrl);
696 if (!url || url[0] == '\0') continue;
697
698 if(addr[0] != '\0'){
699 if(strcmp(addr,log_entry.Ip)!=0) continue;
700 }
701 if(Filter->HostFilter) {
702 if(!vhexclude(url)) {
703 if (debugm) printf(_("Excluded site: %s\n"),url);
704 totregsx++;
705 continue;
706 }
707 }
708
709 if(Filter->StartTime >= 0 && Filter->EndTime >= 0) {
710 hmr=log_entry.EntryTime->tm_hour*100+log_entry.EntryTime->tm_min;
711 if(hmr < Filter->StartTime || hmr > Filter->EndTime) continue;
712 }
713
714 if(site[0] != '\0'){
715 if(strstr(url,site)==0) continue;
716 }
717
718 if(UserIp) {
719 log_entry.User=log_entry.Ip;
720 id_is_ip=true;
721 } else {
722 id_is_ip=false;
723 if(strcmp(log_entry.User,"-") == 0 || strcmp(log_entry.User," ") == 0 || strcmp(log_entry.User,"") == 0) {
724 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
725 log_entry.User=log_entry.Ip;
726 id_is_ip=true;
727 }
728 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
729 continue;
730 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
731 log_entry.User="everybody";
732 } else {
733 strlow(log_entry.User);
734 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
735 if ((str=strchr(user,'+'))!=NULL || (str=strchr(user,'\\'))!=NULL || (str=strchr(user,'_'))!=NULL) {
736 log_entry.User=str+1;
737 }
738 }
739 }
740 }
741
742 if(us[0] != '\0'){
743 if(strcmp(log_entry.User,us)!=0) continue;
744 }
745
746 if(Filter->SysUsers) {
747 snprintf(wuser,sizeof(wuser),":%s:",log_entry.User);
748 if(strstr(userfile, wuser) == 0)
749 continue;
750 }
751
752 if(Filter->UserFilter) {
753 if(!vuexclude(log_entry.User)) {
754 if (debugm) printf(_("Excluded user: %s\n"),log_entry.User);
755 totregsx++;
756 continue;
757 }
758 }
759
760 if(strcmp(log_entry.User,"-") ==0 || strcmp(log_entry.User," ") ==0 ||
761 strcmp(log_entry.User,"") ==0 || strcmp(log_entry.User,":") ==0)
762 continue;
763
764 if (log_entry.DataSize<0) log_entry.DataSize=0;
765
766 if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0;
767 if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) {
768 log_entry.ElapsedTime=0;
769 }
770
771 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
772 fixendofline(str);
773 snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1);
774 } else strcpy(smartfilter,"\"\"");
775
776 nopen=0;
777 prev_ufile=NULL;
778 for (ufile=first_user_file ; ufile && strcmp(log_entry.User,ufile->user->id)!=0 ; ufile=ufile->next) {
779 prev_ufile=ufile;
780 if (ufile->file) nopen++;
781 }
782 if (!ufile) {
783 ufile=malloc(sizeof(*ufile));
784 if (!ufile) {
785 debuga(_("Not enough memory to store the user %s\n"),log_entry.User);
786 exit(EXIT_FAILURE);
787 }
788 memset(ufile,0,sizeof(*ufile));
789 ufile->next=first_user_file;
790 first_user_file=ufile;
791 uinfo=userinfo_create(log_entry.User);
792 ufile->user=uinfo;
793 uinfo->id_is_ip=id_is_ip;
794 } else {
795 if (prev_ufile) {
796 prev_ufile->next=ufile->next;
797 ufile->next=first_user_file;
798 first_user_file=ufile;
799 }
800 }
801 #ifdef ENABLE_DOUBLE_CHECK_DATA
802 ufile->user->nbytes+=log_entry.DataSize;
803 ufile->user->elap+=log_entry.ElapsedTime;
804 #endif
805
806 if (ufile->file==NULL) {
807 if (nopen>=maxopenfiles) {
808 x=0;
809 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
810 if (ufile1->file!=NULL) {
811 if (x>=maxopenfiles) {
812 if (fclose(ufile1->file)==EOF) {
813 debuga(_("Failed to close the log file of user %s - %s\n"),ufile1->user->id,strerror(errno));
814 exit(EXIT_FAILURE);
815 }
816 ufile1->file=NULL;
817 }
818 x++;
819 }
820 }
821 }
822 if (snprintf (tmp3, sizeof(tmp3), "%s/%s.user_unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
823 debuga(_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp, ufile->user->filename);
824 exit(EXIT_FAILURE);
825 }
826 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
827 debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno));
828 exit (1);
829 }
830 }
831
832 strftime(dia, sizeof(dia), "%d/%m/%Y", log_entry.EntryTime);
833 strftime(hora,sizeof(hora),"%H:%M:%S",log_entry.EntryTime);
834
835 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,log_entry.Ip,url,(uint64_t)log_entry.DataSize,log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
836 debuga(_("Write error in the log file of user %s\n"),log_entry.User);
837 exit(EXIT_FAILURE);
838 }
839
840 if(fp_log && ilf!=ILF_Sarg)
841 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
842
843 totregsg++;
844
845 if(!dataonly && download_flag && strstr(log_entry.HttpCode,"DENIED") == 0) {
846 ndownload = 1;
847
848 if ( ! fp_Download_Unsort ) {
849 if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
850 debuga(_("(log) Cannot open temporary file: %s - %s\n"),sz_Download_Unsort, strerror(errno));
851 exit (1);
852 }
853 }
854 fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,log_entry.User,log_entry.Ip,download_url);
855 }
856
857 denied_write(&log_entry);
858 authfail_write(&log_entry);
859
860 if (ilf!=ILF_Sarg) {
861 if(!totper || idata<mindate){
862 mindate=idata;
863 memcpy(&period.start,log_entry.EntryTime,sizeof(*log_entry.EntryTime));
864 strcpy(start_hour,tbuf2);
865 }
866 if (!totper || idata>maxdate) {
867 maxdate=idata;
868 memcpy(&period.end,log_entry.EntryTime,sizeof(*log_entry.EntryTime));
869 }
870 totper=true;
871 }
872
873 if(debugm){
874 printf("IP=\t%s\n",log_entry.Ip);
875 printf("USER=\t%s\n",log_entry.User);
876 printf("ELAP=\t%ld\n",log_entry.ElapsedTime);
877 printf("DATE=\t%s\n",dia);
878 printf("TIME=\t%s\n",hora);
879 printf("FUNC=\t%s\n",fun);
880 printf("URL=\t%s\n",url);
881 printf("CODE=\t%s\n",log_entry.HttpCode);
882 printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize);
883 }
884 }
885
886 if (!from_stdin) {
887 if (from_pipe)
888 pclose(fp_in);
889 else {
890 fclose(fp_in);
891 if (ShowReadStatistics) {
892 if (ShowReadPercent)
893 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2, (float) 100 );
894 else
895 printf(_("SARG: Records in file: %lu\n"),recs2);
896 }
897 }
898 }
899 }
900 longline_destroy(&line);
901
902 if(fp_log != NULL) {
903 char end_hour[128];
904 char val2[40];
905 char val4[255];//val4 must not be bigger than arq_log without fixing the strcpy below
906
907 fclose(fp_log);
908 safe_strcpy(end_hour,tbuf2,sizeof(end_hour));
909 strftime(val2,sizeof(val2),"%d%m%Y",&period.start);
910 strftime(val1,sizeof(val1),"%d%m%Y",&period.end);
911 if (snprintf(val4,sizeof(val4),"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour)>=sizeof(val4)) {
912 debuga(_("File name too long: %s/sarg-%s_%s-%s_%s.log\n"),ParsedOutputLog,val2,start_hour,val1,end_hour);
913 exit(EXIT_FAILURE);
914 }
915 if (rename(arq_log,val4)) {
916 debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno));
917 } else {
918 strcpy(arq_log,val4);
919
920 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
921 /*
922 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
923 necessary around the command name, put them in the configuration file.
924 */
925 if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,arq_log)>=sizeof(val1)) {
926 debuga(_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,arq_log);
927 exit(EXIT_FAILURE);
928 }
929 cstatus=system(val1);
930 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
931 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
932 debuga(_("command: %s\n"),val1);
933 exit(EXIT_FAILURE);
934 }
935 }
936 }
937 if(debug)
938 debuga(_("Sarg parsed log saved as %s\n"),arq_log);
939 }
940
941 denied_close();
942 authfail_close();
943 if (fp_Download_Unsort) fclose (fp_Download_Unsort);
944
945 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
946 ufile1=ufile->next;
947 if (ufile->file!=NULL) fclose(ufile->file);
948 free(ufile);
949 }
950
951 if (debug) {
952 int totalcount=0;
953
954 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
955
956 for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
957
958 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
959 debuga(_("Log with mixed records format (squid and common log)\n"));
960
961 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
962 debuga(_("Common log format\n"));
963
964 if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
965 debuga(_("Squid log format\n"));
966
967 if(ilf_count[ILF_Sarg]>0)
968 debuga(_("Sarg log format\n"));
969
970 if(totalcount==0 && totregsg)
971 debuga(_("Log with invalid format\n"));
972 }
973
974 if(debugz){
975 debugaz(_("date=%s\n"),dia);
976 debugaz(_("period=%s\n"),period.text);
977 }
978
979 return((totregsg!=0) ? 1 : 0);
980 }