]> git.ipfire.org Git - thirdparty/sarg.git/blob - readlog.c
Merge remote branch 'origin/v2.3'
[thirdparty/sarg.git] / readlog.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2012
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
30
31 #define REPORT_EVERY_X_LINES 5000
32 #define MAX_OPEN_USER_FILES 10
33
34 struct userfilestruct
35 {
36 struct userfilestruct *next;
37 struct userinfostruct *user;
38 FILE *file;
39 };
40
41 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
42 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
43
44 extern char *userfile;
45
46 /*!
47 Read the log files.
48
49 \param Filter The filtering parameters for the file to load.
50
51 \retval 1 Records found.
52 \retval 0 No record found.
53 */
54 int ReadLogFile(struct ReadLogDataStruct *Filter)
55 {
56 enum isa_col_id {
57 ISACOL_Ip,
58 ISACOL_UserName,
59 ISACOL_Date,
60 ISACOL_Time,
61 ISACOL_TimeTaken,
62 ISACOL_Bytes,
63 ISACOL_Uri,
64 ISACOL_Status,
65 ISACOL_Last //last entry of the list !
66 };
67 enum InputLogFormat {
68 ILF_Unknown,
69 ILF_Squid,
70 ILF_Common,
71 ILF_Sarg,
72 ILF_Isa,
73 ILF_Last //last entry of the list !
74 };
75
76 enum InputLogFormat ilf;
77 int ilf_count[ILF_Last];
78 longline line;
79 char *linebuf;
80 char *str;
81 char arq_log[255];
82 char fun[MAXLEN];
83 char elap[255];
84 char user[MAX_USER_LEN];
85 char data[255];
86 char ip[60];
87 char hora[30];
88 char mes[30];
89 char tbuf2[128];
90 char dia[128]="";
91 char wuser[MAXLEN];
92 char tmp3[MAXLEN];
93 char sz_Download_Unsort[20000];
94 char start_hour[128];
95 char authfail_unsort[MAXLEN];
96 char download_url[MAXLEN];
97 char smartfilter[MAXLEN];
98 const char *arq;
99 const char *url;
100 int iarq;
101 int blen;
102 int OutputNonZero = REPORT_EVERY_X_LINES ;
103 int idata=0;
104 int isa_ncols=0,isa_cols[ISACOL_Last];
105 int x;
106 int hmr;
107 int nopen;
108 int maxopenfiles=MAX_OPEN_USER_FILES;
109 int mindate=0;
110 int maxdate=0;
111 int cstatus;
112 unsigned long int recs1=0UL;
113 unsigned long int recs2=0UL;
114 long int totregsl=0;
115 long int totregsg=0;
116 long int totregsx=0;
117 long long int iyear, imonth, iday;
118 FILE *fp_in=NULL;
119 FILE *fp_log=NULL;
120 FILE *fp_authfail=NULL;
121 FILE *fp_Download_Unsort=NULL;
122 bool from_pipe;
123 bool from_stdin;
124 bool download_flag=false;
125 bool id_is_ip;
126 bool totper=false;
127 struct stat logstat;
128 struct getwordstruct gwarea;
129 struct tm tt;
130 struct userfilestruct *prev_ufile;
131 struct userinfostruct *uinfo;
132 struct userfilestruct *first_user_file=NULL;
133 struct userfilestruct *ufile;
134 struct userfilestruct *ufile1;
135 struct ReadLogStruct log_entry;
136 time_t tnum;
137
138 for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
139 tmp3[0]='\0';
140 start_hour[0]='\0';
141 first_user_file=NULL;
142
143 snprintf(sz_Download_Unsort,sizeof(sz_Download_Unsort),"%s/download.int_unsort", tmp);
144 snprintf(authfail_unsort,sizeof(authfail_unsort),"%s/authfail.int_unsort",tmp);
145
146 if(DataFile[0]=='\0') {
147 denied_open();
148
149 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
150 if((fp_authfail=MY_FOPEN(authfail_unsort,"w"))==NULL) {
151 debuga(_("(log) Cannot open file: %s - %s\n"),authfail_unsort,strerror(errno));
152 exit(EXIT_FAILURE);
153 }
154 }
155 }
156
157 if ((line=longline_create())==NULL) {
158 debuga(_("Not enough memory to read a log file\n"));
159 exit(EXIT_FAILURE);
160 }
161
162 for (iarq=0 ; iarq<NAccessLog ; iarq++) {
163 arq=AccessLog[iarq];
164
165 if(strcmp(arq,"-")==0) {
166 if(debug)
167 debuga(_("Reading access log file: from stdin\n"));
168 fp_in=stdin;
169 from_stdin=true;
170 } else {
171 if (Filter->DateRange[0]!='\0') {
172 if (stat(arq,&logstat)!=0) {
173 debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
174 } else {
175 struct tm *logtime=localtime(&logstat.st_mtime);
176 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
177 debuga(_("Ignoring old log file %s\n"),arq);
178 continue;
179 }
180 }
181 }
182 fp_in=decomp(arq,&from_pipe);
183 if(fp_in==NULL) {
184 debuga(_("(log) Cannot open log file: %s - %s\n"),arq,strerror(errno));
185 exit(EXIT_FAILURE);
186 }
187 if(debug) debuga(_("Reading access log file: %s\n"),arq);
188 from_stdin=false;
189 }
190
191 ilf=ILF_Unknown;
192 download_flag=false;
193
194 // pre-read the file only if we have to show stats
195 if (ShowReadStatistics && !from_stdin && !from_pipe) {
196 size_t nread,i;
197 bool skipcr=false;
198 char tmp4[MAXLEN];
199
200 recs1=0UL;
201 recs2=0UL;
202
203 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
204 for (i=0 ; i<nread ; i++)
205 if (skipcr) {
206 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
207 skipcr=false;
208 }
209 } else {
210 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
211 skipcr=true;
212 recs1++;
213 }
214 }
215 }
216 rewind(fp_in);
217 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
218 putchar('\r');
219 fflush( stdout ) ;
220 }
221
222 longline_reset(line);
223
224 while ((linebuf=longline_read(fp_in,line))!=NULL) {
225 blen=strlen(linebuf);
226
227 if (ilf==ILF_Unknown) {
228 if(strncmp(linebuf,"#Software: Mic",14) == 0) {
229 fixendofline(linebuf);
230 if (debug)
231 debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
232 ilf=ILF_Isa;
233 ilf_count[ilf]++;
234 continue;
235 }
236
237 if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
238 if (getperiod_fromsarglog(arq,&period)<0) {
239 debuga(_("The name of the file is invalid: %s\n"),arq);
240 exit(EXIT_FAILURE);
241 }
242 ilf=ILF_Sarg;
243 ilf_count[ilf]++;
244 continue;
245 }
246 }
247
248 if(!fp_log && ParsedOutputLog[0] && ilf!=ILF_Sarg) {
249 if(access(ParsedOutputLog,R_OK) != 0) {
250 my_mkdir(ParsedOutputLog);
251 }
252 if (snprintf(arq_log,sizeof(arq_log),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(arq_log)) {
253 debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog);
254 exit(EXIT_FAILURE);
255 }
256 if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
257 debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
258 exit(EXIT_FAILURE);
259 }
260 fputs("*** SARG Log ***\n",fp_log);
261 }
262
263 recs2++;
264 if( ShowReadStatistics && !from_stdin && !from_pipe && --OutputNonZero<=0) {
265 double perc = recs2 * 100. / recs1 ;
266 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
267 putchar('\r');
268 fflush (stdout);
269 OutputNonZero = REPORT_EVERY_X_LINES ;
270 }
271 if(blen < 58) continue;
272 if(strstr(linebuf,"HTTP/0.0") != 0) continue;
273 if(strstr(linebuf,"logfile turned over") != 0) continue;
274 if(linebuf[0] == ' ') continue;
275
276 // exclude_string
277 if(ExcludeString[0] != '\0') {
278 bool exstring=false;
279 getword_start(&gwarea,ExcludeString);
280 while(strchr(gwarea.current,':') != 0) {
281 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
282 debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
283 exit(EXIT_FAILURE);
284 }
285 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
286 exstring=true;
287 break;
288 }
289 }
290 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
291 exstring=true;
292 if(exstring) continue;
293 }
294
295 totregsl++;
296 if(debugm)
297 printf("BUF=%s\n",linebuf);
298
299 memset(&log_entry,0,sizeof(log_entry));
300 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
301 getword_start(&gwarea,linebuf);
302 if (getword(data,sizeof(data),&gwarea,' ')<0) {
303 debuga(_("Maybe you have a broken time in your access.log file\n"));
304 exit(EXIT_FAILURE);
305 }
306 if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
307 strcpy(ip,data);
308 log_entry.Ip=ip;
309 if(squid24) {
310 if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
311 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
312 exit(EXIT_FAILURE);
313 }
314 } else {
315 if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
316 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
317 exit(EXIT_FAILURE);
318 }
319 }
320 log_entry.User=user;
321 if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
322 getword(fun,sizeof(fun),&gwarea,' ')<0) {
323 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
324 exit(EXIT_FAILURE);
325 }
326 if (getword_ptr(linebuf,&log_entry.Url,&gwarea,' ')<0) {
327 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
328 exit(EXIT_FAILURE);
329 }
330 if (getword_skip(MAXLEN,&gwarea,' ')<0) {
331 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
332 exit(EXIT_FAILURE);
333 }
334 if (getword(code2,sizeof(code2),&gwarea,' ')<0) {
335 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
336 exit(EXIT_FAILURE);
337 }
338 if (getword_atoll(&log_entry.DataSize,&gwarea,' ')<0) {
339 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
340 exit(EXIT_FAILURE);
341 }
342 if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
343 if (getword(code,sizeof(code),&gwarea,' ')<0) {
344 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
345 exit(EXIT_FAILURE);
346 }
347 } else {
348 if (getword(code,sizeof(code),&gwarea,'\0')<0) {
349 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
350 exit(EXIT_FAILURE);
351 }
352 }
353
354 if ((str = strchr(code, ':')) != NULL)
355 *str = '/';
356 log_entry.HttpCode=code;
357
358 ilf=ILF_Common;
359 ilf_count[ilf]++;
360
361 getword_start(&gwarea,data+1);
362 if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
363 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
364 exit(EXIT_FAILURE);
365 }
366 if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
367 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
368 exit(EXIT_FAILURE);
369 }
370 getword_start(&gwarea,data);
371 if (getword_atoll(&iday,&gwarea,'/')<0){
372 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
373 exit(EXIT_FAILURE);
374 }
375 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
376 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
377 exit(EXIT_FAILURE);
378 }
379 if (getword_atoll(&iyear,&gwarea,'/')<0){
380 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
381 exit(EXIT_FAILURE);
382 }
383
384 imonth=month2num(mes)+1;
385 idata=builddia(iday,imonth,iyear);
386 computedate(iyear,imonth,iday,&tt);
387 if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
388 tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
389 debuga(_("Invalid time found in %s\n"),arq);
390 exit(EXIT_FAILURE);
391 }
392 log_entry.EntryTime=&tt;
393 }
394
395 if(ilf==ILF_Unknown || ilf==ILF_Squid) {
396 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
397 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
398 exit(EXIT_FAILURE);
399 }
400 while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
401 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
402 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
403 exit(EXIT_FAILURE);
404 }
405 if(strlen(elap) < 1) continue;
406 log_entry.ElapsedTime=atol(elap);
407 if (getword(ip,sizeof(ip),&gwarea,' ')<0){
408 debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq);
409 exit(EXIT_FAILURE);
410 }
411 log_entry.Ip=ip;
412 if (getword(code,sizeof(code),&gwarea,' ')<0){
413 debuga(_("Maybe you have a broken result code in your %s file\n"),arq);
414 exit(EXIT_FAILURE);
415 }
416 log_entry.HttpCode=code;
417 if (getword_atoll(&log_entry.DataSize,&gwarea,' ')<0){
418 debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq);
419 exit(EXIT_FAILURE);
420 }
421 if (getword(fun,sizeof(fun),&gwarea,' ')<0){
422 debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
423 exit(EXIT_FAILURE);
424 }
425 if (getword_ptr(linebuf,&log_entry.Url,&gwarea,' ')<0){
426 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
427 exit(EXIT_FAILURE);
428 }
429 if (getword(user,sizeof(user),&gwarea,' ')<0){
430 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
431 exit(EXIT_FAILURE);
432 }
433 log_entry.User=user;
434 ilf=ILF_Squid;
435 ilf_count[ilf]++;
436
437 tnum=atoi(data);
438 log_entry.EntryTime=localtime(&tnum);
439 if (log_entry.EntryTime == NULL) {
440 debuga(_("Cannot convert the timestamp from the squid log file\n"));
441 exit(EXIT_FAILURE);
442 }
443
444 strftime(tbuf2, sizeof(tbuf2), "%H%M", log_entry.EntryTime);
445
446 idata=(log_entry.EntryTime->tm_year+1900)*10000+(log_entry.EntryTime->tm_mon+1)*100+log_entry.EntryTime->tm_mday;
447 }
448 }
449 if (ilf==ILF_Sarg) {
450 getword_start(&gwarea,linebuf);
451 if (getword(data,sizeof(data),&gwarea,'\t')<0){
452 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
453 exit(EXIT_FAILURE);
454 }
455 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
456 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
457 exit(EXIT_FAILURE);
458 }
459 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
460 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
461 exit(EXIT_FAILURE);
462 }
463 log_entry.User=user;
464 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
465 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
466 exit(EXIT_FAILURE);
467 }
468 log_entry.Ip=ip;
469 if (getword_ptr(linebuf,&log_entry.Url,&gwarea,'\t')<0){
470 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
471 exit(EXIT_FAILURE);
472 }
473 if (getword_atoll(&log_entry.DataSize,&gwarea,'\t')<0){
474 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
475 exit(EXIT_FAILURE);
476 }
477 if (getword(code,sizeof(code),&gwarea,'\t')<0){
478 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
479 exit(EXIT_FAILURE);
480 }
481 log_entry.HttpCode=code;
482 if (getword_atol(&log_entry.ElapsedTime,&gwarea,'\t')<0){
483 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
484 exit(EXIT_FAILURE);
485 }
486 if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
487 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
488 exit(EXIT_FAILURE);
489 }
490 getword_start(&gwarea,data);
491 if (getword_atoll(&iday,&gwarea,'/')<0 || iday<1 || iday>31){
492 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
493 exit(EXIT_FAILURE);
494 }
495 if (getword_atoll(&imonth,&gwarea,'/')<0 || imonth<1 || imonth>12){
496 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
497 exit(EXIT_FAILURE);
498 }
499 if (getword_atoll(&iyear,&gwarea,'\0')<0){
500 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
501 exit(EXIT_FAILURE);
502 }
503 idata=builddia(iday,imonth,iyear);
504 computedate(iyear,imonth,iday,&tt);
505 if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
506 tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
507 debuga(_("Invalid time found in %s\n"),arq);
508 exit(EXIT_FAILURE);
509 }
510 log_entry.EntryTime=&tt;
511 }
512 if (ilf==ILF_Isa) {
513 if (linebuf[0] == '#') {
514 int ncols,cols[ISACOL_Last];
515
516 fixendofline(linebuf);
517 getword_start(&gwarea,linebuf);
518 // remove the #Fields: column at the beginning of the line
519 if (getword_skip(1000,&gwarea,' ')<0){
520 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
521 exit(EXIT_FAILURE);
522 }
523 for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
524 ncols=0;
525 while(gwarea.current[0] != '\0') {
526 if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
527 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
528 exit(EXIT_FAILURE);
529 }
530 if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
531 if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
532 if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
533 if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
534 if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
535 if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
536 if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
537 if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
538 ncols++;
539 }
540 if (cols[ISACOL_Ip]>=0) {
541 isa_ncols=ncols;
542 for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
543 isa_cols[ncols]=cols[ncols];
544 }
545 continue;
546 }
547 if (!isa_ncols) continue;
548 getword_start(&gwarea,linebuf);
549 for (x=0 ; x<isa_ncols ; x++) {
550 if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
551 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
552 exit(EXIT_FAILURE);
553 }
554 if (x==isa_cols[ISACOL_Ip]) {
555 if (strlen(str)>=sizeof(ip)) {
556 debuga(_("Maybe you have a broken IP in your %s file\n"),arq);
557 exit(EXIT_FAILURE);
558 }
559 strcpy(ip,str);
560 log_entry.Ip=ip;
561 } else if (x==isa_cols[ISACOL_UserName]) {
562 if (strlen(str)>=sizeof(user)) {
563 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
564 exit(EXIT_FAILURE);
565 }
566 strcpy(user,str);
567 log_entry.User=user;
568 } else if (x==isa_cols[ISACOL_Date]) {
569 if (strlen(str)>=sizeof(data)) {
570 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
571 exit(EXIT_FAILURE);
572 }
573 strcpy(data,str);
574 } else if (x==isa_cols[ISACOL_Time]) {
575 if (strlen(str)>=sizeof(hora)) {
576 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
577 exit(EXIT_FAILURE);
578 }
579 strcpy(hora,str);
580 } else if (x==isa_cols[ISACOL_TimeTaken]) {
581 log_entry.ElapsedTime=atol(str);
582 } else if (x==isa_cols[ISACOL_Bytes]) {
583 log_entry.DataSize=atoll(str);
584 } else if (x==isa_cols[ISACOL_Uri]) {
585 log_entry.Url=str;
586 } else if (x==isa_cols[ISACOL_Status]) {
587 if (strlen(str)>=sizeof(code)) {
588 debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
589 exit(EXIT_FAILURE);
590 }
591 strcpy(code,str);
592 }
593 }
594
595 if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
596 static char valcode[12];
597 sprintf(valcode,"DENIED/%s",code);
598 log_entry.HttpCode=valcode;
599 } else {
600 log_entry.HttpCode=code;
601 }
602 getword_start(&gwarea,data);
603 if (getword_atoll(&iyear,&gwarea,'-')<0){
604 debuga(_("Maybe you have a broken year in your %s file\n"),arq);
605 exit(EXIT_FAILURE);
606 }
607 if (getword_atoll(&imonth,&gwarea,'-')<0){
608 debuga(_("Maybe you have a broken month in your %s file\n"),arq);
609 exit(EXIT_FAILURE);
610 }
611 if (getword_atoll(&iday,&gwarea,'\0')<0){
612 debuga(_("Maybe you have a broken day in your %s file\n"),arq);
613 exit(EXIT_FAILURE);
614 }
615
616 idata=builddia(iday,imonth,iyear);
617 computedate(iyear,imonth,iday,&tt);
618 if (isa_cols[ISACOL_Time]>=0) {
619 if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 ||
620 tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) {
621 debuga(_("Invalid time found in %s\n"),arq);
622 exit(EXIT_FAILURE);
623 }
624 }
625 log_entry.EntryTime=&tt;
626 }
627 if (log_entry.EntryTime==NULL) {
628 debuga(_("Unknown input log file format: no time\n"));
629 break;
630 }
631 if (log_entry.Ip==NULL) {
632 debuga(_("Unknown input log file format: no IP addresses\n"));
633 break;
634 }
635 if (log_entry.User==NULL) {
636 debuga(_("Unknown input log file format: no user\n"));
637 break;
638 }
639 if (log_entry.Url==NULL) {
640 debuga(_("Unknown input log file format: no URL\n"));
641 break;
642 }
643
644 if(debugm)
645 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil);
646
647 if(Filter->DateRange[0] != '\0'){
648 if(idata < dfrom || idata > duntil) continue;
649 }
650
651 // Record only hours usage which is required
652 if( bsearch( &( log_entry.EntryTime->tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL )
653 continue;
654
655 if( bsearch( &( log_entry.EntryTime->tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL )
656 continue;
657
658
659 if(strlen(log_entry.User) > MAX_USER_LEN) {
660 if (debugm) printf(_("User ID too long: %s\n"),log_entry.User);
661 totregsx++;
662 continue;
663 }
664
665 // include_users
666 if(IncludeUsers[0] != '\0') {
667 snprintf(val1,sizeof(val1),":%s:",log_entry.User);
668 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
669 continue;
670 }
671
672 if(vercode(log_entry.HttpCode)) {
673 if (debugm) printf(_("Excluded code: %s\n"),log_entry.HttpCode);
674 totregsx++;
675 continue;
676 }
677
678 if(testvaliduserchar(log_entry.User))
679 continue;
680
681 // replace any tab by a single space
682 for (str=log_entry.Url ; *str ; str++)
683 if (*str=='\t') *str=' ';
684 for (str=log_entry.HttpCode ; *str ; str++)
685 if (*str=='\t') *str=' ';
686
687 if(ilf!=ILF_Sarg) {
688 /*
689 The full URL is not saved in sarg log. There is no point in testing the URL to detect
690 a downloaded file.
691 */
692 download_flag=is_download_suffix(log_entry.Url);
693 if (download_flag) {
694 safe_strcpy(download_url,log_entry.Url,sizeof(download_url));
695 download_count++;
696 }
697 } else
698 download_flag=false;
699
700 url=process_url(log_entry.Url,LongUrl);
701 if (!url || url[0] == '\0') continue;
702
703 if(addr[0] != '\0'){
704 if(strcmp(addr,log_entry.Ip)!=0) continue;
705 }
706 if(Filter->HostFilter) {
707 if(!vhexclude(url)) {
708 if (debugm) printf(_("Excluded site: %s\n"),url);
709 totregsx++;
710 continue;
711 }
712 }
713
714 if(Filter->StartTime >= 0 && Filter->EndTime >= 0) {
715 hmr=log_entry.EntryTime->tm_hour*100+log_entry.EntryTime->tm_min;
716 if(hmr < Filter->StartTime || hmr > Filter->EndTime) continue;
717 }
718
719 if(site[0] != '\0'){
720 if(strstr(url,site)==0) continue;
721 }
722
723 if(UserIp) {
724 log_entry.User=log_entry.Ip;
725 id_is_ip=true;
726 } else {
727 id_is_ip=false;
728 if(strcmp(log_entry.User,"-") == 0 || strcmp(log_entry.User," ") == 0 || strcmp(log_entry.User,"") == 0) {
729 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
730 log_entry.User=log_entry.Ip;
731 id_is_ip=true;
732 }
733 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
734 continue;
735 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
736 log_entry.User="everybody";
737 } else {
738 strlow(log_entry.User);
739 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
740 if ((str=strchr(user,'+'))!=NULL || (str=strchr(user,'\\'))!=NULL || (str=strchr(user,'_'))!=NULL) {
741 log_entry.User=str+1;
742 }
743 }
744 }
745 }
746
747 if(us[0] != '\0'){
748 if(strcmp(log_entry.User,us)!=0) continue;
749 }
750
751 if(Filter->SysUsers) {
752 snprintf(wuser,sizeof(wuser),":%s:",log_entry.User);
753 if(strstr(userfile, wuser) == 0)
754 continue;
755 }
756
757 if(Filter->UserFilter) {
758 if(!vuexclude(log_entry.User)) {
759 if (debugm) printf(_("Excluded user: %s\n"),log_entry.User);
760 totregsx++;
761 continue;
762 }
763 }
764
765 if(strcmp(log_entry.User,"-") ==0 || strcmp(log_entry.User," ") ==0 ||
766 strcmp(log_entry.User,"") ==0 || strcmp(log_entry.User,":") ==0)
767 continue;
768
769 if (log_entry.DataSize<0) log_entry.DataSize=0;
770
771 if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0;
772 if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) {
773 log_entry.ElapsedTime=0;
774 }
775
776 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
777 fixendofline(str);
778 snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1);
779 } else strcpy(smartfilter,"\"\"");
780
781 nopen=0;
782 prev_ufile=NULL;
783 for (ufile=first_user_file ; ufile && strcmp(log_entry.User,ufile->user->id)!=0 ; ufile=ufile->next) {
784 prev_ufile=ufile;
785 if (ufile->file) nopen++;
786 }
787 if (!ufile) {
788 ufile=malloc(sizeof(*ufile));
789 if (!ufile) {
790 debuga(_("Not enough memory to store the user %s\n"),log_entry.User);
791 exit(EXIT_FAILURE);
792 }
793 memset(ufile,0,sizeof(*ufile));
794 ufile->next=first_user_file;
795 first_user_file=ufile;
796 uinfo=userinfo_create(log_entry.User);
797 ufile->user=uinfo;
798 uinfo->id_is_ip=id_is_ip;
799 } else {
800 if (prev_ufile) {
801 prev_ufile->next=ufile->next;
802 ufile->next=first_user_file;
803 first_user_file=ufile;
804 }
805 }
806 #ifdef ENABLE_DOUBLE_CHECK_DATA
807 ufile->user->nbytes+=log_entry.DataSize;
808 ufile->user->elap+=log_entry.ElapsedTime;
809 #endif
810
811 if (ufile->file==NULL) {
812 if (nopen>=maxopenfiles) {
813 x=0;
814 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
815 if (ufile1->file!=NULL) {
816 if (x>=maxopenfiles) {
817 if (fclose(ufile1->file)==EOF) {
818 debuga(_("Failed to close the log file of user %s - %s\n"),ufile1->user->id,strerror(errno));
819 exit(EXIT_FAILURE);
820 }
821 ufile1->file=NULL;
822 }
823 x++;
824 }
825 }
826 }
827 if (snprintf (tmp3, sizeof(tmp3), "%s/%s.unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
828 debuga(_("Temporary user file name too long: %s/%s.unsort\n"), tmp, ufile->user->filename);
829 exit(EXIT_FAILURE);
830 }
831 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
832 debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno));
833 exit (1);
834 }
835 }
836
837 strftime(dia, sizeof(dia), "%d/%m/%Y", log_entry.EntryTime);
838 strftime(hora,sizeof(hora),"%H:%M:%S",log_entry.EntryTime);
839
840 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,log_entry.Ip,url,(uint64_t)log_entry.DataSize,log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
841 debuga(_("Write error in the log file of user %s\n"),log_entry.User);
842 exit(EXIT_FAILURE);
843 }
844
845 if(fp_log && ilf!=ILF_Sarg)
846 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
847
848 totregsg++;
849
850 if(!dataonly && download_flag && strstr(log_entry.HttpCode,"DENIED") == 0) {
851 ndownload = 1;
852
853 if ( ! fp_Download_Unsort ) {
854 if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
855 debuga(_("(log) Cannot open temporary file: %s - %s\n"),sz_Download_Unsort, strerror(errno));
856 exit (1);
857 }
858 }
859 fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,log_entry.User,log_entry.Ip,download_url);
860 }
861
862 denied_write(&log_entry);
863 if((ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
864 if(fp_authfail && (strstr(log_entry.HttpCode,"DENIED/401") != 0 || strstr(log_entry.HttpCode,"DENIED/407") != 0)) {
865 fprintf(fp_authfail, "%s\t%s\t%s\t%s\t%s\n",dia,hora,log_entry.User,log_entry.Ip,log_entry.Url);
866 authfail_count++;
867 }
868 }
869
870 if (ilf!=ILF_Sarg) {
871 if(!totper || idata<mindate){
872 mindate=idata;
873 memcpy(&period.start,log_entry.EntryTime,sizeof(*log_entry.EntryTime));
874 strcpy(start_hour,tbuf2);
875 }
876 if (!totper || idata>maxdate) {
877 maxdate=idata;
878 memcpy(&period.end,log_entry.EntryTime,sizeof(*log_entry.EntryTime));
879 }
880 totper=true;
881 }
882
883 if(debugm){
884 printf("IP=\t%s\n",log_entry.Ip);
885 printf("USER=\t%s\n",log_entry.User);
886 printf("ELAP=\t%ld\n",log_entry.ElapsedTime);
887 printf("DATE=\t%s\n",dia);
888 printf("TIME=\t%s\n",hora);
889 printf("FUNC=\t%s\n",fun);
890 printf("URL=\t%s\n",url);
891 printf("CODE=\t%s\n",log_entry.HttpCode);
892 printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize);
893 }
894 }
895
896 if (!from_stdin) {
897 if (from_pipe)
898 pclose(fp_in);
899 else {
900 fclose(fp_in);
901 if( ShowReadStatistics )
902 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs1, (float) 100 );
903 }
904 }
905 }
906 longline_destroy(&line);
907
908 if(fp_log != NULL) {
909 char end_hour[128];
910 char val2[40];
911 char val4[255];//val4 must not be bigger than arq_log without fixing the strcpy below
912
913 fclose(fp_log);
914 safe_strcpy(end_hour,tbuf2,sizeof(end_hour));
915 strftime(val2,sizeof(val2),"%d%m%Y",&period.start);
916 strftime(val1,sizeof(val1),"%d%m%Y",&period.end);
917 if (snprintf(val4,sizeof(val4),"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour)>=sizeof(val4)) {
918 debuga(_("File name too long: %s/sarg-%s_%s-%s_%s.log\n"),ParsedOutputLog,val2,start_hour,val1,end_hour);
919 exit(EXIT_FAILURE);
920 }
921 if (rename(arq_log,val4)) {
922 debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno));
923 } else {
924 strcpy(arq_log,val4);
925
926 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
927 /*
928 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
929 necessary around the command name, put them in the configuration file.
930 */
931 if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,arq_log)>=sizeof(val1)) {
932 debuga(_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,arq_log);
933 exit(EXIT_FAILURE);
934 }
935 cstatus=system(val1);
936 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
937 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
938 debuga(_("command: %s\n"),val1);
939 exit(EXIT_FAILURE);
940 }
941 }
942 }
943 if(debug)
944 debuga(_("Sarg parsed log saved as %s\n"),arq_log);
945 }
946
947 denied_close();
948 if (fp_authfail) fclose(fp_authfail);
949 if (fp_Download_Unsort) fclose (fp_Download_Unsort);
950
951 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
952 ufile1=ufile->next;
953 if (ufile->file!=NULL) fclose(ufile->file);
954 free(ufile);
955 }
956
957 if (debug) {
958 int totalcount=0;
959
960 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
961
962 for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
963
964 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
965 debuga(_("Log with mixed records format (squid and common log)\n"));
966
967 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
968 debuga(_("Common log format\n"));
969
970 if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
971 debuga(_("Squid log format\n"));
972
973 if(ilf_count[ILF_Sarg]>0)
974 debuga(_("Sarg log format\n"));
975
976 if(totalcount==0 && totregsg)
977 debuga(_("Log with invalid format\n"));
978 }
979
980 if(debugz){
981 debugaz(_("date=%s\n"),dia);
982 debugaz(_("period=%s\n"),period.text);
983 }
984
985 return((totregsg!=0) ? 1 : 0);
986 }