]> git.ipfire.org Git - thirdparty/sarg.git/blob - log.c
Index created using an internal sort algorithm instead of a system call
[thirdparty/sarg.git] / log.c
1 /*
2 * AUTHOR: Pedro Lineu Orso pedro.orso@gmail.com
3 * 1998, 2010
4 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
5 *
6 * SARG donations:
7 * please look at http://sarg.sourceforge.net/donations.php
8 * ---------------------------------------------------------------------
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
23 *
24 */
25
26 #include "include/conf.h"
27 #include "include/defs.h"
28
29 #define REPORT_EVERY_X_LINES 5000
30
31
32 char *userfile;
33 char *excludefile;
34 char *excludeuser;
35
36 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
37 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
38 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
39
40 static void getusers(const char *pwdfile, int debug);
41 static void gethexclude(const char *hexfile, int debug);
42 static void getuexclude(const char *uexfile, int debug);
43
44 int main(int argc,char *argv[])
45 {
46 enum isa_col_id {
47 ISACOL_Ip,
48 ISACOL_UserName,
49 ISACOL_Date,
50 ISACOL_Time,
51 ISACOL_TimeTaken,
52 ISACOL_Bytes,
53 ISACOL_Uri,
54 ISACOL_Status,
55 ISACOL_Last //last entry of the list !
56 };
57 enum InputLogFormat {
58 ILF_Unknown,
59 ILF_Squid,
60 ILF_Common,
61 ILF_Sarg,
62 ILF_Isa,
63 ILF_Last //last entry of the list !
64 };
65
66 FILE *fp_in = NULL, *fp_denied=NULL, *fp_authfail=NULL, *fp_log=NULL;
67
68 char sz_Download_Unsort[ 20000 ] ;
69 FILE * fp_Download_Unsort = NULL ;
70 FILE * fp_Write_User = NULL ;
71
72 extern int optind;
73 extern int optopt;
74 extern char *optarg;
75
76 char data[255];
77 char elap[255];
78 char ip[MAXLEN];
79 char msg[MAXLEN];
80 char tam[255];
81 char fun[MAXLEN];
82 char wuser[MAXLEN];
83 char smartfilter[MAXLEN];
84 char dia[128];
85 char wdata[128];
86 char mes[30];
87 char ano[30];
88 char hora[30];
89 char wtemp[MAXLEN];
90 char wtemp2[255];
91 char date[255];
92 char arq[255];
93 char arq_log[255];
94 char hm[15], hmf[15], hmr[15];
95 int chm=0;
96 char uagent[MAXLEN];
97 char hexclude[MAXLEN];
98 char csort[MAXLEN];
99 int cstatus;
100 char tbuf[128];
101 char tbuf2[128];
102 char zip[20];
103 char *str;
104 char bufz[MAXLEN];
105 char bufy[MAXLEN];
106 char tmp2[MAXLEN];
107 enum InputLogFormat ilf;
108 int ilf_count[ILF_Last];
109 int ch;
110 int x, l;
111 int errflg=0;
112 int puser=0;
113 int fhost=0;
114 int dns=0;
115 int fuser=0;
116 int idata=0;
117 int mindate=0;
118 int iarq=0;
119 int exstring=0;
120 int isa_ncols=0,isa_cols[ISACOL_Last];
121 int from_stdin;
122 long totregsl=0;
123 long totregsg=0;
124 long totregsx=0;
125 long totper=0;
126 long int max_elapsed=0;
127 time_t tt;
128 struct tm *t;
129 unsigned long recs1=0UL;
130 unsigned long recs2=0UL;
131 int OutputNonZero = REPORT_EVERY_X_LINES ;
132 int download_flag;
133 char download_url[MAXLEN];
134 char sz_Last_User[MAXLEN]="";
135 struct getwordstruct gwarea;
136
137 BgImage[0]='\0';
138 LogoImage[0]='\0';
139 LogoText[0]='\0';
140 PasswdFile[0]='\0';
141 OutputEmail[0]='\0';
142 UserAgentLog[0]='\0';
143 ExcludeHosts[0]='\0';
144 ExcludeUsers[0]='\0';
145 ConfigFile[0]='\0';
146 code[0]='\0';
147 LastLog[0]='\0';
148 ReportType[0]='\0';
149 UserTabFile[0]='\0';
150 BlockIt[0]='\0';
151 ExternalCSSFile[0]='\0';
152 SquidGuardLogFormat[0]='\0';
153 SquidGuardLogAlternate[0]='\0';
154 for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
155
156 sprintf(ExcludeCodes,"%s/exclude_codes",SYSCONFDIR);
157 strcpy(GraphDaysBytesBarColor,"orange");
158 strcpy(BgColor,"#ffffff");
159 strcpy(TxColor,"#000000");
160 strcpy(TxBgColor,"lavender");
161 strcpy(TiColor,"darkblue");
162 strcpy(Width,"80");
163 strcpy(Height,"45");
164 strcpy(LogoTextColor,"#000000");
165 strcpy(HeaderColor,"darkblue");
166 strcpy(HeaderBgColor,"#dddddd");
167 strcpy(LogoTextColor,"#006699");
168 strcpy(FontSize,"9px");
169 strcpy(TempDir,"/tmp");
170 strcpy(OutputDir,"/var/www/html/squid-reports");
171 Ip2Name=0;
172 strcpy(DateFormat,"u");
173 OverwriteReport=0;
174 RemoveTempFiles=1;
175 strcpy(ReplaceIndex,"index.html");
176 strcpy(Index,"yes");
177 strcpy(RecordsWithoutUser,"ip");
178 UseComma=0;
179 strcpy(MailUtility,"mailx");
180 strcpy(TopSitesNum,"100");
181 UserIp=0;
182 strcpy(TopuserSortField,"BYTES");
183 strcpy(UserSortField,"BYTES");
184 strcpy(TopuserSortOrder,"reverse");
185 strcpy(UserSortOrder,"reverse");
186 strcpy(TopsitesSortField,"CONNECT");
187 strcpy(TopsitesSortType,"D");
188 LongUrl=0;
189 strcpy(language,"English");
190 strcpy(FontFace,"Verdana,Tahoma,Arial");
191 strcpy(datetimeby,"elap");
192 strcpy(CharSet,"ISO-8859-1");
193 Privacy=0;
194 strcpy(PrivacyString,"***.***.***.***");
195 strcpy(PrivacyStringColor,"blue");
196 SuccessfulMsg=1;
197 strcpy(TopUserFields,"NUM DATE_TIME USERID CONNECT BYTES %BYTES IN-CACHE-OUT USED_TIME MILISEC %TIME TOTAL AVERAGE");
198 strcpy(UserReportFields,"CONNECT BYTES %BYTES IN-CACHE-OUT USED_TIME MILISEC %TIME TOTAL AVERAGE");
199 strcpy(DataFileDelimiter,";");
200 strcpy(DataFileFields,"user;date;time;url;connect;bytes;in_cache;out_cache;elapsed");
201 strcpy(SiteUserTimeDateType,"table");
202 ShowReadStatistics=1;
203 strcpy(IndexSortOrder,"D");
204 ShowSargInfo=1;
205 ShowSargLogo=1;
206 strcpy(ParsedOutputLog,"no");
207 strcpy(ParsedOutputLogCompress,"/bin/gzip");
208 strcpy(DisplayedValues,"abbreviation");
209 strcpy(HeaderFontSize,"9px");
210 strcpy(TitleFontSize,"11px");
211 strcpy(AuthUserFile,"/usr/local/sarg/passwd");
212 strcpy(AuthName,"SARG, Restricted Access");
213 strcpy(AuthType,"basic");
214 strcpy(Require,"require user admin %u");
215 set_download_suffix("7z,ace,arj,avi,bat,bin,bz2,bzip,cab,com,cpio,dll,doc,dot,exe,gz,iso,lha,lzh,mdb,mov,mp3,mpeg,mpg,mso,nrg,ogg,ppt,rar,rtf,shs,src,sys,tar,tgz,vcd,vob,wma,wmv,zip");
216 Graphs=1;
217 strcpy(Ulimit,"20000");
218 strcpy(NtlmUserFormat,"domainname+username");
219 strcpy(IndexTree,"file");
220 strcpy(RealtimeTypes,"GET,PUT,CONNECT");
221 strcpy(RealtimeUnauthRec,"show");
222 SquidguardIgnoreDate=0;
223 DansguardianIgnoreDate=0;
224 strcpy(DataFileUrl,"ip");
225 strcpy(MaxElapsed,"28800000");
226 BytesInSitesUsersReport=0;
227 UserAuthentication=0;
228
229 dia[0]='\0';
230 mes[0]='\0';
231 ano[0]='\0';
232 hora[0]='\0';
233 tmp[0]='\0';
234 tmp2[0]='\0';
235 tmp3[0]='\0';
236 wtemp[0]='\0';
237 wtemp2[0]='\0';
238 us[0]='\0';
239 date[0]='\0';
240 df[0]='\0';
241 uagent[0]='\0';
242 hexclude[0]='\0';
243 addr[0]='\0';
244 hm[0]='\0';
245 hmf[0]='\0';
246 site[0]='\0';
247 outdir[0]='\0';
248 elap[0]='\0';
249 email[0]='\0';
250 zip[0]='\0';
251 UserInvalidChar[0]='\0';
252 DataFile[0]='\0';
253 SquidGuardConf[0]='\0';
254 DansGuardianConf[0]='\0';
255
256 denied_count=0;
257 download_count=0;
258 authfail_count=0;
259 dansguardian_count=0;
260 squidguard_count=0;
261 DeniedReportLimit=10;
262 AuthfailReportLimit=10;
263 DansGuardianReportLimit=10;
264 SquidGuardReportLimit=10;
265 DownloadReportLimit=50;
266 UserReportLimit=0;
267 debug=0;
268 debugz=0;
269 debugm=0;
270 iprel=0;
271 userip=0;
272 color1=0;
273 color2=0;
274 color3=0;
275 dotinuser=0;
276 realt=0;
277 realtime_refresh=3;
278 realtime_access_log_lines=1000;
279 cost=0.01;
280 nocost=50000000;
281 ndownload=0;
282 squid24=0;
283
284 bzero(IncludeUsers, MAXLEN);
285 bzero(ExcludeString, MAXLEN);
286
287 #ifdef HAVE_LOCALE_H
288 setlocale(LC_TIME,"");
289 #endif
290
291 NAccessLog=0;
292 for(x=0; x<=MAXLOGS; x++)
293 AccessLog[x][0]='\0';
294 AccessLogFromCmdLine=0;
295
296 language_load(language);
297 strcpy(Title,text[88]);
298
299 while((ch = getopt(argc, argv, "a:b:c:d:e:f:g:u:l:L:o:s:t:w:hijmnprvxyz")) != -1){
300 switch(ch)
301 {
302 case 'a':
303 strcpy(addr,optarg);
304 break;
305 case 'b':
306 strcpy(uagent,optarg);
307 break;
308 case 'c':
309 strcpy(hexclude,optarg);
310 break;
311 case 'd':
312 strncpy(date,optarg,sizeof(date)-1);
313 date[sizeof(date)-1]='\0';
314 getword_start(&gwarea,optarg);
315 if (getword(cdfrom,sizeof(cdfrom),&gwarea,'-')<0 || getword(cduntil,sizeof(cduntil),&gwarea,0)<0) {
316 printf("SARG: Maybe you have a broken record or garbage in your date range.\n");
317 exit(1);
318 }
319 date_from(date, cdfrom, cduntil);
320 dfrom=atoi(cdfrom);
321 duntil=atoi(cduntil);
322 break;
323 case 'e':
324 strcpy(email,optarg);
325 break;
326 case 'f':
327 strcpy(ConfigFile,optarg);
328 break;
329 case 'g':
330 strcpy(df,optarg);
331 break;
332 case 'h':
333 usage(argv[0]);
334 exit(0);
335 break;
336 case 'i':
337 iprel++;
338 break;
339 case 'l':
340 if (NAccessLog>=MAXLOGS) {
341 printf("SARG: Too many log files.\n");
342 exit(1);
343 }
344 strcpy(AccessLog[NAccessLog],optarg);
345 NAccessLog++;
346 AccessLogFromCmdLine++;
347 break;
348 case 'L':
349 strcpy(SquidGuardLogAlternate,optarg);
350 break;
351 case 'm':
352 debugm++;
353 break;
354 case 'n':
355 dns++;
356 break;
357 case 'o':
358 strcpy(outdir,optarg);
359 break;
360 case 'p':
361 userip++;
362 break;
363 case 'r':
364 realt++;
365 break;
366 case 's':
367 strcpy(site,optarg);
368 break;
369 case 't':
370 {
371 int h,m;
372
373 if(strstr(optarg,"-") == 0) {
374 strcpy(hm,optarg);
375 strcpy(hmf,optarg);
376 } else {
377 getword_start(&gwarea,optarg);
378 if (getword(hm,sizeof(hm),&gwarea,'-')<0 || getword(hmf,sizeof(hmf),&gwarea,0)<0) {
379 fprintf(stderr,"SARG: Maybe you have a broken record or garbage in your time range.\n");
380 exit(1);
381 }
382 }
383 if(sscanf(hm,"%d:%d",&h,&m)!=2) {
384 fprintf(stderr,"SARG: time period must be MM or MM:SS. Exit.\n");
385 exit(1);
386 }
387 sprintf(hm,"%02d%02d",h,m);
388 if(sscanf(hmf,"%d:%d",&h,&m)!=2) {
389 fprintf(stderr,"SARG: time period must be MM or MM:SS. Exit.\n");
390 exit(1);
391 }
392 sprintf(hmf,"%02d%02d",h,m);
393 break;
394 }
395 case 'u':
396 strcpy(us,optarg);
397 break;
398 case 'v':
399 version();
400 break;
401 case 'w':
402 strcpy(tmp,optarg);
403 break;
404 case 'x':
405 debug++;
406 break;
407 case 'y':
408 langcode++;
409 break;
410 case 'z':
411 debugz++;
412 break;
413 case ':':
414 fprintf(stderr, "Option -%c require an argument\n",optopt);
415 errflg++;
416 break;
417 case '?':
418 usage(argv[0]);
419 exit(1);
420 break;
421 }
422
423 }
424
425 if (errflg) {
426 usage(argv[0]);
427 exit(2);
428 }
429
430 if(debug) debuga("Init");
431
432 if(ConfigFile[0] == '\0') sprintf(ConfigFile,"%s/sarg.conf",SYSCONFDIR);
433 if(access(ConfigFile, R_OK) != 0) {
434 debuga("Cannot open config file: %s - %s",ConfigFile,strerror(errno));
435 exit(1);
436 }
437
438 if(access(ConfigFile, R_OK) == 0)
439 getconf();
440
441 if(realt) {
442 realtime();
443 exit(0);
444 }
445
446 if(strcmp(IndexTree,"file") == 0)
447 strcpy(ImageFile,"../images");
448 else
449 strcpy(ImageFile,"../../../images");
450
451 dataonly=0;
452 if(DataFile[0] != '\0')
453 dataonly++;
454
455 subs(TopUserFields,sizeof(TopUserFields),"%BYTES","SETYB");
456
457 subs(UserReportFields,sizeof(UserReportFields),"%BYTES","SETYB");
458
459 if(!NAccessLog) {
460 strcpy(AccessLog[0],"/var/log/squid/access.log");
461 NAccessLog++;
462 }
463
464 if(strcmp(hexclude,"onvert") == 0 && strcmp(site,"plit") != 0) {
465 convlog(AccessLog[0], df, dfrom, duntil);
466 exit(0);
467 }
468
469 if(strcmp(site,"plit") == 0) {
470 splitlog(AccessLog[0], df, dfrom, duntil, hexclude);
471 exit(0);
472 }
473
474 load_excludecodes(ExcludeCodes);
475
476 if(access(PasswdFile, R_OK) == 0) {
477 getusers(PasswdFile,debug);
478 puser++;
479 }
480
481 if(hexclude[0] == '\0')
482 strcpy(hexclude,ExcludeHosts);
483 if(strlen(hexclude) > 0) {
484 if(access(hexclude, R_OK) != 0) {
485 debuga("Cannot open exclude_hosts file: %s - %s",hexclude,strerror(errno));
486 exit(1);
487 }
488 gethexclude(hexclude,debug);
489 fhost++;
490 }
491
492 if(ReportType[0] == '\0')
493 strcpy(ReportType,"topusers topsites users_sites sites_users date_time denied auth_failures site_user_time_date downloads");
494
495 if(access(ExcludeUsers, R_OK) == 0) {
496 getuexclude(ExcludeUsers,debug);
497 fuser++;
498 }
499
500 indexonly=0;
501 if(fuser) {
502 if(strstr(excludeuser,"indexonly") != 0)
503 indexonly++;
504 }
505 if(strcmp(ExcludeUsers,"indexonly") == 0) indexonly++;
506 if(strcmp(Index,"only") == 0) indexonly++;
507
508 if(dns) Ip2Name=1;
509
510 if(UserIp) userip++;
511
512 if(strlen(MaxElapsed)>1) max_elapsed=atol(MaxElapsed);
513
514 if(strlen(outdir)<1) strcpy(outdir,OutputDir);
515 strcat(outdir,"/");
516
517 if(uagent[0] == '\0') strcpy(uagent,UserAgentLog);
518
519 if(tmp[0] == '\0') strcpy(tmp,TempDir);
520 else strcpy(TempDir,tmp);
521
522 if(df[0] == '\0') strcpy(df,DateFormat);
523 else strcpy(DateFormat,df);
524
525 if(df[0] == '\0') {
526 strcpy(df,"u");
527 strcpy(DateFormat,"u");
528 }
529
530 if(email[0] == '\0' && OutputEmail[0] != '\0') strcpy(email,OutputEmail);
531
532 strcpy(tmp2,tmp);
533
534 if(email[0] != '\0') {
535 sprintf(wtemp2,"%s/sarg",tmp2);
536 my_mkdir(wtemp2);
537 strcat(tmp2,"/sarg");
538 strcpy(outdir,tmp2);
539 strcat(outdir,"/");
540 }
541
542 strcat(tmp2,"/sarg.log");
543
544 sprintf(tmp3,"%s/sarg",tmp);
545 if(access(tmp3, R_OK) == 0) {
546 unlinkdir(tmp3,1);
547 }
548 my_mkdir(tmp3);
549 strcpy(tmp4,tmp3);
550 strcpy(tmp5,tmp3);
551 strcpy(tmp6,tmp3);
552 strcat(tmp4,"/denied.log.unsort");
553 strcat(tmp5,"/denied.log");
554 strcat(tmp6,"/authfail.log.unsort");
555
556 if(debug) {
557 fprintf(stderr, "SARG: %s:\nSARG:\n",text[22]);
558 fprintf(stderr, "SARG: %35s (-a) = %s\n",text[23],addr);
559 fprintf(stderr, "SARG: %35s (-b) = %s\n",text[71],uagent);
560 fprintf(stderr, "SARG: %35s (-c) = %s\n",text[69],hexclude);
561 fprintf(stderr, "SARG: %35s (-d) = %s\n",text[24],date);
562 fprintf(stderr, "SARG: %35s (-e) = %s\n",text[41],email);
563 fprintf(stderr, "SARG: %35s (-f) = %s\n",text[70],ConfigFile);
564 if(strcmp(df,"e") == 0)
565 fprintf(stderr, "SARG: %35s (-g) = %s (dd/mm/yyyy)\n",text[25],text[26]);
566 if(strcmp(df,"u") == 0)
567 fprintf(stderr, "SARG: %35s (-g) = %s (mm/dd/yyyy)\n",text[25],text[27]);
568 if(strcmp(df,"w") == 0)
569 fprintf(stderr, "SARG: %35s (-g) = %s (yyyy/ww)\n",text[25],text[85]);
570 if(iprel)
571 fprintf(stderr, "SARG: %35s (-i) = %s\n",text[28],text[1]);
572 else
573 fprintf(stderr, "SARG: %35s (-i) = %s\n",text[28],text[2]);
574 for (iarq=0 ; iarq<NAccessLog ; iarq++)
575 fprintf(stderr, "SARG: %35s (-l) = %s\n",text[37],AccessLog[iarq]);
576 if(Ip2Name)
577 fprintf(stderr, "SARG: %35s (-n) = %s\n",text[65],text[1]);
578 else
579 fprintf(stderr, "SARG: %35s (-n) = %s\n",text[65],text[2]);
580 fprintf(stderr, "SARG: %35s (-o) = %s\n",text[38],outdir);
581 if(UserIp)
582 fprintf(stderr, "SARG: %35s (-p) = %s\n",text[29],text[1]);
583 else
584 fprintf(stderr, "SARG: %35s (-p) = %s\n",text[29],text[2]);
585 fprintf(stderr, "SARG: %35s (-s) = %s\n",text[30],site);
586 fprintf(stderr, "SARG: %35s (-t) = %s\n",text[31],hm);
587 fprintf(stderr, "SARG: %35s (-u) = %s\n",text[32],us);
588 fprintf(stderr, "SARG: %35s (-w) = %s\n",text[34],tmp);
589 if(debug)
590 fprintf(stderr, "SARG: %35s (-x) = %s\n",text[35],text[1]);
591 else
592 fprintf(stderr, "SARG: %35s (-x) = %s\n",text[35],text[2]);
593 if(debugz)
594 fprintf(stderr, "SARG: %35s (-z) = %s\n",text[36],text[1]);
595 else
596 fprintf(stderr, "SARG: %35s (-z) = %s\n",text[36],text[2]);
597 fprintf(stderr, "SARG:\n");
598 }
599
600 if(debugm) {
601 printf("%s:\nSARG:\n",text[22]);
602 printf("%35s (-a) = %s\n",text[23],addr);
603 printf("%35s (-b) = %s\n",text[71],uagent);
604 printf("%35s (-c) = %s\n",text[69],hexclude);
605 printf("%35s (-d) = %s\n",text[24],date);
606 printf("%35s (-e) = %s\n",text[41],email);
607 printf("%35s (-f) = %s\n",text[70],ConfigFile);
608 if(strcmp(df,"e") == 0)
609 printf("%35s (-g) = %s (dd/mm/yyyy)\n",text[25],text[26]);
610 if(strcmp(df,"u") == 0)
611 printf("%35s (-g) = %s (mm/dd/yyyy)\n",text[25],text[27]);
612 if(strcmp(df,"w") == 0)
613 printf("%35s (-g) = %s (yyyy/ww)\n",text[25],text[85]);
614 if(iprel)
615 printf("%35s (-i) = %s\n",text[28],text[1]);
616 else
617 printf("%35s (-i) = %s\n",text[28],text[2]);
618 for (iarq=0 ; iarq<NAccessLog ; iarq++)
619 printf("%35s (-l) = %s\n",text[37],AccessLog[iarq]);
620 if(Ip2Name)
621 printf("%35s (-n) = %s\n",text[65],text[1]);
622 else
623 printf("%35s (-n) = %s\n",text[65],text[2]);
624 printf("%35s (-o) = %s\n",text[38],outdir);
625 if(UserIp)
626 printf("%35s (-p) = %s\n",text[29],text[1]);
627 else
628 printf("%35s (-p) = %s\n",text[29],text[2]);
629 printf("%35s (-s) = %s\n",text[30],site);
630 printf("%35s (-t) = %s\n",text[31],hm);
631 printf("%35s (-u) = %s\n",text[32],us);
632 printf("%35s (-w) = %s\n",text[34],tmp);
633 if(debug)
634 printf("%35s (-x) = %s\n",text[35],text[1]);
635 else
636 printf("%35s (-x) = %s\n",text[35],text[2]);
637 if(debugz)
638 printf("%35s (-z) = %s\n",text[36],text[1]);
639 else
640 printf("%35s (-z) = %s\n",text[36],text[2]);
641 printf("sarg %s: %s\n",text[73],VERSION);
642 printf("Language=%s\n\n",text[3]);
643 }
644
645 if(debug)
646 debuga("sarg %s: %s",text[73],VERSION);
647
648 #ifdef HAVE_RLIM_T
649 if (Ulimit[0] != '\0') {
650 struct rlimit rl;
651 long l1, l2;
652 int rc=0;
653
654 #if defined(RLIMIT_NOFILE)
655 getrlimit (RLIMIT_NOFILE, &rl);
656 #elif defined(RLIMIT_OFILE)
657 getrlimit (RLIMIT_OFILE, &rl);
658 #else
659 #warning "No rlimit resource for the number of open files"
660 #endif
661 l1 = rl.rlim_cur;
662 l2 = rl.rlim_max;
663
664 rl.rlim_cur = atol(Ulimit);
665 rl.rlim_max = atol(Ulimit);
666 #if defined(RLIMIT_NOFILE)
667 rc=setrlimit (RLIMIT_NOFILE, &rl);
668 #elif defined(RLIMIT_OFILE)
669 rc=setrlimit (RLIMIT_OFILE, &rl);
670 #else
671 #warning "No rlimit resource for the number of open files"
672 #endif
673 if(rc == -1) {
674 debuga("setrlimit error - %s\n",strerror(errno));
675 }
676
677 if(debug)
678 debuga("Maximum file descriptor: cur=%ld max=%ld, changed to cur="RLIM_STRING" max="RLIM_STRING,l1,l2,rl.rlim_cur,rl.rlim_max);
679 }
680 #endif
681
682 read_usertab(UserTabFile);
683
684 sprintf ( sz_Download_Unsort , "%s/sarg/download.unsort", tmp);
685
686 if(strstr(ReportType,"denied") != 0) {
687 if((fp_denied=MY_FOPEN(tmp4,"w"))==NULL) {
688 fprintf(stderr, "%s: (log) %s: %s - %s\n",argv[0],text[45],tmp4,strerror(errno));
689 exit(1);
690 }
691 }
692
693 if(DataFile[0]=='\0') {
694 if(strstr(ReportType,"denied") != 0 || strstr(ReportType,"auth_failures") != 0) {
695 if((fp_authfail=MY_FOPEN(tmp6,"w"))==NULL) {
696 fprintf(stderr, "%s: (log) %s: %s - %s\n",argv[0],text[45],tmp6,strerror(errno));
697 exit(1);
698 }
699 }
700 }
701
702 for (iarq=0 ; iarq<NAccessLog ; iarq++) {
703 strcpy(arq,AccessLog[iarq]);
704
705 strcpy(arqtt,arq);
706
707 if(strcmp(arq,"-")==0) {
708 if(debug)
709 debuga("%s: %s",text[7],"stdin");
710 fp_in=stdin;
711 from_stdin=1;
712 } else {
713 decomp(arq,zip,tmp);
714 if(debug)
715 debuga("%s: %s",text[7],arq);
716 if((fp_in=MY_FOPEN(arq,"r"))==NULL) {
717 fprintf(stderr, "%s: (log) %s: %s - %s\n",argv[0],text[8],arq,strerror(errno));
718 exit(1);
719 }
720 from_stdin=0;
721 }
722 ilf=ILF_Unknown;
723 // pre-Read the file only if I have to show stats
724 if(ShowReadStatistics && !from_stdin) {
725 rewind(fp_in);
726 recs1=0UL;
727 recs2=0UL;
728
729 while( fgets(bufz,sizeof(bufz),fp_in) != NULL ) recs1++;
730 rewind(fp_in);
731 printf("SARG: Records in file: %lu, reading: %3.2f%%\r",recs1,(float) 0);
732 fflush( stdout ) ;
733 }
734
735 while(fgets(bufz,sizeof(bufz),fp_in)!=NULL) {
736
737 if (ilf==ILF_Unknown) {
738 if(strncmp(bufz,"#Software: Mic",14) == 0) {
739 fixendofline(bufz);
740 debuga("%s: %s",text[143],bufz);
741 ilf=ILF_Isa;
742 ilf_count[ilf]++;
743 continue;
744 }
745
746 if(strncmp(bufz,"*** SARG Log ***",16) == 0) {
747 getword_start(&gwarea,arqtt);
748 if (getword(val2,sizeof(val2),&gwarea,'-')<0 || getword(val2,sizeof(val2),&gwarea,'_')<0 ||
749 getword(val3,sizeof(val3),&gwarea,'-')<0 || getword(val3,sizeof(val3),&gwarea,'_')<0) {
750 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
751 exit(1);
752 }
753 sprintf(period,"%s-%s",val2,val3);
754 ilf=ILF_Sarg;
755 ilf_count[ilf]++;
756 continue;
757 }
758 }
759
760 if(strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg) {
761 if(access(ParsedOutputLog,R_OK) != 0) {
762 sprintf(csort,"%s",ParsedOutputLog);
763 my_mkdir(csort);
764 }
765 sprintf(arq_log,"%s/sarg_temp.log",ParsedOutputLog);
766 if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
767 fprintf(stderr, "%s: (log) %s: %s - %s\n",argv[0],text[8],arq_log,strerror(errno));
768 exit(1);
769 }
770 fputs("*** SARG Log ***\n",fp_log);
771 }
772
773 recs2++;
774 if( ShowReadStatistics && !from_stdin && ! --OutputNonZero) {
775 perc = recs2 * 100 ;
776 perc = perc / recs1 ;
777 printf("SARG: Records in file: %lu, reading: %3.2f%%\r",recs1,perc);
778 fflush (stdout);
779 OutputNonZero = REPORT_EVERY_X_LINES ;
780 }
781 if(strlen(bufz) > MAXLEN-1) continue;
782 if(!bufz[0]) continue;
783 if(strstr(bufz,"HTTP/0.0") != 0) continue;
784 if(strstr(bufz,"logfile turned over") != 0) continue;
785 if(bufz[0] == ' ') continue;
786 if(strlen(bufz) < 58) continue;
787
788 // Record only hours usage which is required
789 tt = (time_t) strtoul( bufz, NULL, 10 );
790 t = localtime( &tt );
791
792 if( bsearch( &( t -> tm_wday ), weekdays.list, weekdays.len,
793 sizeof( int ), compar ) == NULL )
794 continue;
795
796 if( bsearch( &( t -> tm_hour ), hours.list, hours.len,
797 sizeof( int ), compar ) == NULL )
798 continue;
799
800 // exclude_string
801 exstring=0;
802 if(ExcludeString[0] != '\0') {
803 strcpy(warea,bufz);
804 getword_start(&gwarea,ExcludeString);
805 while(strchr(gwarea.current,':') != 0) {
806 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
807 printf("SARG: Maybe you have a broken record or garbage in your exclusion string.\n");
808 exit(1);
809 }
810 if((str=(char *) strstr(warea,val1)) != (char *) NULL )
811 exstring++;
812 }
813 if((str=(char *) strstr(warea,gwarea.current)) != (char *) NULL )
814 exstring++;
815 }
816 if(exstring) continue;
817
818 strcpy(bufy,bufz);
819 if ((str = strchr(bufz, '\n')) != NULL)
820 *str = '\0'; /* strip \n */
821
822 totregsl++;
823 if(debugm)
824 printf("BUF=%s\n",bufz);
825
826 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
827 getword_start(&gwarea,bufz);
828 if (getword(data,sizeof(data),&gwarea,' ')<0) {
829 printf("SARG: Maybe you have a broken record or garbage in your access.log file.\n");
830 exit(1);
831 }
832 if((str=(char *) strchr(data, '.')) != (char *) NULL ) {
833 if((str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
834 strcpy(ip,data);
835 strcpy(elap,"0");
836 if(squid24) {
837 if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
838 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
839 exit(1);
840 }
841 } else {
842 if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
843 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
844 exit(1);
845 }
846 }
847 if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
848 getword(fun,sizeof(fun),&gwarea,' ')<0 || getword(url,sizeof(url),&gwarea,' ')<0 ||
849 getword_skip(MAXLEN,&gwarea,' ')<0 || getword(code2,sizeof(code2),&gwarea,' ')<0 ||
850 getword(tam,sizeof(tam),&gwarea,' ')<0) {
851 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
852 exit(1);
853 }
854 if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
855 if (getword(code,sizeof(code),&gwarea,' ')<0) {
856 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
857 exit(1);
858 }
859 } else {
860 if (getword(code,sizeof(code),&gwarea,'\0')<0) {
861 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
862 exit(1);
863 }
864 }
865
866 if ((str = strchr(code, ':')) != NULL)
867 *str = '/';
868
869 if(strcmp(tam,"\0") == 0)
870 strcpy(tam,"0");
871
872 ilf=ILF_Common;
873 ilf_count[ilf]++;
874 }
875 }
876
877 if(ilf==ILF_Unknown || ilf==ILF_Squid) {
878 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
879 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
880 exit(1);
881 }
882 while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
883 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
884 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
885 exit(1);
886 }
887 if(strlen(elap) < 1) continue;
888 if (getword(ip,sizeof(ip),&gwarea,' ')<0){
889 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
890 exit(1);
891 }
892 if (getword(code,sizeof(code),&gwarea,' ')<0){
893 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
894 exit(1);
895 }
896 if (getword(tam,sizeof(tam),&gwarea,' ')<0){
897 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
898 exit(1);
899 }
900 if (getword(fun,sizeof(fun),&gwarea,' ')<0){
901 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
902 exit(1);
903 }
904 if (getword(url,sizeof(url),&gwarea,' ')<0){
905 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
906 exit(1);
907 }
908 // while (strstr(bufz,"%20") != 0) {
909 // getword(warea,bufz,' ');
910 // strcat(url,warea);
911 // }
912 if (getword(user,sizeof(user),&gwarea,' ')<0){
913 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
914 exit(1);
915 }
916 ilf=ILF_Squid;
917 ilf_count[ilf]++;
918 }
919 }
920 if (ilf==ILF_Sarg) {
921 getword_start(&gwarea,bufz);
922 if (getword(data,sizeof(data),&gwarea,' ')<0){
923 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
924 exit(1);
925 }
926 if (getword(hora,sizeof(hora),&gwarea,' ')<0) {
927 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
928 exit(1);
929 }
930 if (getword(user,sizeof(user),&gwarea,' ')<0) {
931 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
932 exit(1);
933 }
934 if (getword(ip,sizeof(ip),&gwarea,' ')<0) {
935 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
936 exit(1);
937 }
938 if (getword(url,sizeof(url),&gwarea,' ')<0){
939 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
940 exit(1);
941 }
942 if (getword(tam,sizeof(tam),&gwarea,' ')<0){
943 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
944 exit(1);
945 }
946 if (getword(code,sizeof(code),&gwarea,' ')<0){
947 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
948 exit(1);
949 }
950 if (getword(elap,sizeof(elap),&gwarea,' ')<0){
951 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
952 exit(1);
953 }
954 if (getword(smartfilter,sizeof(smartfilter),&gwarea,' ')<0){
955 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
956 exit(1);
957 }
958 }
959 if (ilf==ILF_Isa) {
960 if (bufz[0] == '#') {
961 int ncols,cols[ISACOL_Last];
962
963 fixendofline(bufz);
964 getword_start(&gwarea,bufz);
965 // remove the #Fields: column at the beginning of the line
966 if (getword_skip(1000,&gwarea,' ')<0){
967 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
968 exit(1);
969 }
970 for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
971 ncols=0;
972 while(gwarea.current[0] != '\0') {
973 if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
974 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
975 exit(1);
976 }
977 if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
978 if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
979 if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
980 if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
981 if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
982 if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
983 if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
984 if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
985 ncols++;
986 }
987 if (cols[ISACOL_Ip]>=0) {
988 isa_ncols=ncols;
989 for (isa_ncols=0 ; isa_ncols<ncols ; isa_ncols++)
990 isa_cols[isa_ncols]=cols[isa_ncols];
991 }
992 continue;
993 }
994 if (!isa_ncols) continue;
995 getword_start(&gwarea,bufz);
996 for (x=0 ; x<isa_ncols ; x++) {
997 if (getword(val1,sizeof(val1),&gwarea,'\t')<0) {
998 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
999 exit(1);
1000 }
1001 if (x==isa_cols[ISACOL_Ip]) {
1002 if (strlen(val1)>=sizeof(ip)) {
1003 printf("SARG: Maybe you have a broken IP in your %s file.\n",arq);
1004 exit(1);
1005 }
1006 strcpy(ip,val1);
1007 } else if (x==isa_cols[ISACOL_UserName]) {
1008 if (strlen(val1)>=sizeof(user)) {
1009 printf("SARG: Maybe you have a broken user in your %s file.\n",arq);
1010 exit(1);
1011 }
1012 strcpy(user,val1);
1013 } else if (x==isa_cols[ISACOL_Date]) {
1014 if (strlen(val1)>=sizeof(data)) {
1015 printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
1016 exit(1);
1017 }
1018 strcpy(data,val1);
1019 } else if (x==isa_cols[ISACOL_Time]) {
1020 if (strlen(val1)>=sizeof(hora)) {
1021 printf("SARG: Maybe you have a broken time in your %s file.\n",arq);
1022 exit(1);
1023 }
1024 strcpy(hora,val1);
1025 } else if (x==isa_cols[ISACOL_TimeTaken]) {
1026 if (strlen(val1)>=sizeof(elap)) {
1027 printf("SARG: Maybe you have a broken download duration in your %s file.\n",arq);
1028 exit(1);
1029 }
1030 strcpy(elap,val1);
1031 } else if (x==isa_cols[ISACOL_Bytes]) {
1032 if (strlen(val1)>=sizeof(tam)) {
1033 printf("SARG: Maybe you have a broken download size in your %s file.\n",arq);
1034 exit(1);
1035 }
1036 strcpy(tam,val1);
1037 } else if (x==isa_cols[ISACOL_Uri]) {
1038 if (strlen(val1)>=sizeof(url)) {
1039 printf("SARG: Maybe you have a broken URL in your %s file.\n",arq);
1040 exit(1);
1041 }
1042 strcpy(url,val1);
1043 } else if (x==isa_cols[ISACOL_Status]) {
1044 if (strlen(val1)>=sizeof(code)) {
1045 printf("SARG: Maybe you have a broken access code in your %s file.\n",arq);
1046 exit(1);
1047 }
1048 strcpy(code,val1);
1049 }
1050 }
1051
1052 if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
1053 sprintf(val1,"DENIED/%s",code);
1054 strcpy(code,val1);
1055 }
1056 getword_start(&gwarea,data);
1057 if (getword(ano,sizeof(ano),&gwarea,'-')<0){
1058 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
1059 exit(1);
1060 }
1061 if (getword(mes,sizeof(mes),&gwarea,'-')<0){
1062 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
1063 exit(1);
1064 }
1065 if (getword(dia,sizeof(dia),&gwarea,'\0')<0){
1066 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
1067 exit(1);
1068 }
1069 conv_month_name(mes);
1070 sprintf(data," %s/%s/%s:%s",dia,mes,ano,hora);
1071 }
1072
1073 if(strlen(user) > 150) {
1074 if (debugm) printf("User too long: %s\n",user);
1075 totregsx++;
1076 continue;
1077 }
1078
1079 // include_users
1080 if(IncludeUsers[0] != '\0') {
1081 sprintf(val1,":%s:",user);
1082 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
1083 continue;
1084 }
1085
1086 if(vercode(code)) {
1087 if (debugm) printf("Excluded code: %s\n",code);
1088 totregsx++;
1089 continue;
1090 }
1091
1092 if(testvaliduserchar(user))
1093 continue;
1094
1095 #if 0
1096 if((str = strstr(user,"%20")) != NULL) {
1097 /*
1098 Why is it necessary to truncate the user name at the first space ?
1099
1100 The old code used to truncate the user name at the first % if a %20 was
1101 found anywhere in the string. That means the string could be truncated
1102 at the wrong place if another % occured before the %20. This new code should
1103 avoid that problem and only truncate at the space. There is no bug
1104 report indicating that anybody noticed this.
1105 */
1106 *str='\0';
1107 }
1108
1109 /*
1110 Code prior to 2.2.7 used to replace any %xx by a dot as long as a %5c was
1111 found in the user name.
1112 */
1113 while((str = strstr(user,"%5c")) != NULL) {
1114 *str='.';
1115 for (x=3 ; str[x] ; x++) str[x-2]=str[x];
1116 }
1117 #endif
1118
1119 for(str=user; *str; str++) {
1120 if(*str=='.') dotinuser++;
1121 if(*str=='?' || *str=='.' || *str==':' || *str=='/' || *str=='\\')
1122 *str='_';
1123 }
1124
1125 strlow(user);
1126 if(strncmp(NtlmUserFormat,"user",4) == 0) {
1127 if((str = strchr(user,'_')) != 0) {
1128 strcpy(warea,str+1);
1129 strcpy(user,warea);
1130 }
1131 if((str = strchr(user,'+')) != 0) {
1132 strcpy(warea,str+1);
1133 strcpy(user,warea);
1134 }
1135 }
1136
1137 if(strstr(ReportType,"denied") != 0)
1138 strcpy(urly,url);
1139
1140 download_flag=is_download_suffix(url);
1141 if (download_flag) {
1142 strcpy(download_url,url);
1143 download_count++;
1144 }
1145
1146 // remove any protocol:// at the beginning of the URL
1147 if ((str = strchr(url,'/')) != NULL && str[1] == '/') {
1148 int i;
1149
1150 str+=2;
1151 for (i=0 ; str[i] ; i++)
1152 url[i]=str[i];
1153 url[i]='\0';
1154 }
1155
1156 if(!LongUrl) {
1157 char *endofhost=strchr(url,'/');
1158 if (endofhost)
1159 *endofhost='\0';
1160 if(strlen(url) > 512 && (endofhost=strchr(url,'%')) != NULL) {
1161 *endofhost='\0';
1162 }
1163 }
1164
1165 if(ilf==ILF_Squid) {
1166 tt=atoi(data);
1167 t=localtime(&tt);
1168
1169 strftime(tbuf2, sizeof(tbuf2), "%H%M", t);
1170 sprintf(mes,"%d",t->tm_mon+1);
1171 conv_month_name(mes);
1172 if(strncmp(df,"u",1) == 0)
1173 sprintf(tbuf, "%04d%s%02d", t->tm_year+1900, mes, t->tm_mday);
1174 if(strncmp(df,"e",1) == 0)
1175 sprintf(tbuf, "%02d%s%04d", t->tm_mday, mes, t->tm_year+1900);
1176 if(strncmp(df,"w",1) == 0) {
1177 strcpy(IndexTree,"file");
1178 strftime(tbuf, sizeof(tbuf), "%Y.%U", t);
1179 }
1180
1181 strftime(wdata, sizeof(wdata), "%Y%m%d", t);
1182 idata=atoi(wdata);
1183
1184 if(strncmp(df,"u",1)==0)
1185 strftime(dia, sizeof(dia), "%m/%d/%Y", t);
1186 else
1187 strftime(dia, sizeof(dia), "%d/%m/%Y", t);
1188 sprintf(hora,"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec);
1189 } else if(ilf==ILF_Common || ilf==ILF_Isa) {
1190 getword_start(&gwarea,data+1);
1191 if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
1192 printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
1193 exit(1);
1194 }
1195 if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
1196 printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
1197 exit(1);
1198 }
1199 getword_start(&gwarea,data);
1200 if (getword_multisep(dia,sizeof(dia),&gwarea,'/')<0){
1201 printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
1202 exit(1);
1203 }
1204 if (getword_multisep(mes,sizeof(mes),&gwarea,'/')<0){
1205 printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
1206 exit(1);
1207 }
1208 if (getword_multisep(ano,sizeof(ano),&gwarea,'/')<0){
1209 printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
1210 exit(1);
1211 }
1212
1213 if(strcmp(df,"u") == 0)
1214 snprintf(tbuf,sizeof(tbuf),"%s%s%s",ano,mes,dia);
1215 if(strcmp(df,"e") == 0)
1216 snprintf(tbuf,sizeof(tbuf),"%s%s%s",dia,mes,ano);
1217 builddia(dia,mes,ano,df,wdata);
1218 idata=atoi(wdata);
1219 } else if (ilf==ILF_Sarg) {
1220 getword_start(&gwarea,data);
1221 if (getword_multisep(mes,sizeof(mes),&gwarea,'/')<0){
1222 printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
1223 exit(1);
1224 }
1225 if (getword_multisep(dia,sizeof(dia),&gwarea,'/')<0){
1226 printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
1227 exit(1);
1228 }
1229 if (getword_multisep(ano,sizeof(ano),&gwarea,0)<0){
1230 printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
1231 exit(1);
1232 }
1233 }
1234
1235 if(debugm)
1236 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",date,idata,dfrom,duntil);
1237
1238 l=1;
1239 if(l){
1240 if(addr[0] != '\0'){
1241 if(strcmp(addr,ip)==0)
1242 l=1;else l=0;
1243 }
1244 if(fhost) {
1245 // l=vhexclude(excludefile,ip);
1246 l=vhexclude(excludefile,url);
1247 if(!l) {
1248 if (debugm) printf("Excluded site: %s\n",url);
1249 totregsx++;
1250 }
1251 }
1252 }
1253
1254 if(l){
1255 if(date[0] != '\0'){
1256 if(idata >= dfrom && idata <= duntil)
1257 l=1;else l=0;
1258 }
1259 }
1260 if(l){
1261 if(hm[0] != '\0') {
1262 bzero(hmr,sizeof(hmr));
1263 chm++;
1264 getword_start(&gwarea,hora);
1265 while(chm) {
1266 if (getword_multisep(warea,sizeof(warea),&gwarea,':')<0){
1267 printf("SARG: Maybe you have a broken time in your %s file.\n",arq);
1268 exit(1);
1269 }
1270 strncat(hmr,warea,2);
1271 chm--;
1272 }
1273 strncat(hmr,gwarea.current,2);
1274
1275 if(atoi(hmr) >= atoi(hm) && atoi(hmr) <= atoi(hmf))
1276 l=1;else l=0;
1277 }
1278 }
1279 if(l){
1280 if(site[0] != '\0'){
1281 if(strstr(url,site)!=0)
1282 l=1;else l=0;
1283 }
1284 }
1285
1286 if(userip)
1287 strcpy(user,ip);
1288
1289 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
1290 if(strcmp(RecordsWithoutUser,"ip") == 0)
1291 strcpy(user,ip);
1292 if(strcmp(RecordsWithoutUser,"ignore") == 0)
1293 continue;
1294 if(strcmp(RecordsWithoutUser,"everybody") == 0)
1295 strcpy(user,"everybody");
1296 }
1297 if(us[0] != '\0'){
1298 if(strcmp(user,us)==0)
1299 l=1;
1300 else
1301 l=0;
1302 }
1303
1304 if(dotinuser) {
1305 subs(user,sizeof(user),"_",".");
1306 dotinuser=0;
1307 }
1308
1309 if(puser) {
1310 sprintf(wuser,":%s:",user);
1311 if(strstr(userfile, wuser) == 0)
1312 continue;
1313 }
1314
1315 if(l) {
1316 if(fuser) {
1317 l=vuexclude(excludeuser,user);
1318 if(!l) {
1319 if (debugm) printf("Excluded user: %s\n",user);
1320 totregsx++;
1321 }
1322 }
1323 }
1324
1325 if(l) {
1326 if(userip)
1327 fixip(user);
1328 }
1329
1330 if(l && max_elapsed) {
1331 if(atol(elap)>max_elapsed) {
1332 elap[0]='0';
1333 elap[1]='\0';
1334 }
1335 }
1336
1337 if(l) {
1338 if(strcmp(user,"-") !=0 && url[0] != '\0' && strcmp(user," ") !=0 && strcmp(user,"") !=0 && strcmp(user,":") !=0){
1339 if((str=(char *) strstr(bufz, "[SmartFilter:")) != (char *) NULL ) {
1340 fixendofline(str);
1341 sprintf(smartfilter,"\"%s\"",str+1);
1342 } else sprintf(smartfilter,"\"\"");
1343
1344 sprintf(bufz, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,url,tam,code,elap,smartfilter);
1345
1346 if ( strcmp ( user , sz_Last_User ) != 0 ) {
1347 if ( fp_Write_User )
1348 fclose( fp_Write_User ) ;
1349 sprintf (tmp3, "%s/sarg/%s.unsort", tmp, user);
1350
1351 if ((fp_Write_User = MY_FOPEN (tmp3, "a")) == NULL) {
1352 fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], text[9], tmp3, strerror(errno));
1353 exit (1);
1354 }
1355 strcpy( sz_Last_User , user ) ;
1356 }
1357 fputs (bufz, fp_Write_User);
1358
1359 if(strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg)
1360 fputs(bufz,fp_log);
1361
1362 totregsg++;
1363
1364 if(download_flag && strstr(code,"DENIED") == 0) {
1365 ndownload = 1;
1366
1367 if ( ! fp_Download_Unsort ) {
1368 if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
1369 fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], text[9], tmp3, strerror(errno));
1370 exit (1);
1371 }
1372 }
1373 fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,download_url);
1374 }
1375
1376 if(strstr(ReportType,"denied") != 0 || strstr(ReportType,"auth_failures") != 0) {
1377 if(strstr(code,"DENIED/403") != 0) {
1378 fprintf(fp_denied, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1379 denied_count++;
1380 }
1381 if(strstr(code,"DENIED/401") != 0 || strstr(code,"DENIED/407") != 0) {
1382 if(fp_authfail)
1383 fprintf(fp_authfail, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1384 authfail_count++;
1385 }
1386 }
1387
1388 if((!totper || idata<mindate) && ilf!=ILF_Sarg){
1389 totper++;
1390 mindate=idata;
1391 sprintf(period,"%s-",tbuf);
1392 sprintf(per_hour,"%s-",tbuf2);
1393 if(date[0] != '\0')
1394 fixper(tbuf, period, cduntil);
1395 if(debugz){
1396 debugaz("tbuf",tbuf);
1397 debugaz("period",period);
1398 }
1399 }
1400 }
1401
1402 if(debugm){
1403 printf("IP=\t%s\n",ip);
1404 printf("USER=\t%s\n",user);
1405 printf("ELAP=\t%s\n",elap);
1406 printf("DATE=\t%s\n",dia);
1407 printf("TIME=\t%s\n",hora);
1408 printf("FUNC=\t%s\n",fun);
1409 printf("URL=\t%s\n",url);
1410 printf("CODE=\t%s\n",code);
1411 printf("LEN=\t%s\n",tam);
1412 }
1413 }
1414 }
1415 if (!from_stdin) {
1416 fclose(fp_in);
1417 if( ShowReadStatistics )
1418 printf("SARG: Records in file: %lu, reading: %3.2f%%\n",recs1, (float) 100 );
1419 }
1420 }
1421
1422 if ( fp_Download_Unsort )
1423 fclose (fp_Download_Unsort);
1424
1425 if (fp_Write_User)
1426 fclose (fp_Write_User);
1427
1428 if(debug) {
1429 int totalcount=0;
1430
1431 for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
1432
1433 debuga(" %s: %ld, %s: %ld, %s: %ld",text[10],totregsl,text[11],totregsg,text[68],totregsx);
1434
1435 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
1436 debuga("%s",text[12]);
1437
1438 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
1439 debuga("%s",text[13]);
1440
1441 if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
1442 debuga("%s",text[14]);
1443
1444 if(ilf_count[ILF_Sarg]>0)
1445 debuga("%s",text[124]);
1446
1447 if(totalcount==0) {
1448 if(!totregsg) {
1449 fprintf(stderr, "SARG: %s\n",text[16]);
1450 fprintf(stderr, "SARG: %s\n",text[21]);
1451 } else fprintf(stderr, "SARG: %s\n",text[15]);
1452 bzero(msg,sizeof(msg));
1453 if(fp_denied)
1454 fclose(fp_denied);
1455 if(fp_authfail)
1456 fclose(fp_authfail);
1457 free_excludecodes();
1458 if(userfile)
1459 free(userfile);
1460 if(excludefile)
1461 free(excludefile);
1462 if(excludeuser)
1463 free(excludeuser);
1464 free_download();
1465 unlink(tmp4);
1466 unlink(tmp6);
1467 unlink(tmp3);
1468 exit(0);
1469 }
1470 }
1471
1472 if(!totregsg){
1473 fprintf(stderr, "SARG: %s\n",text[16]);
1474 fprintf(stderr, "SARG: %s\n",text[21]);
1475 // fclose(fp_ou);
1476 if(fp_denied)
1477 fclose(fp_denied);
1478 if(fp_authfail)
1479 fclose(fp_authfail);
1480 free_excludecodes();
1481 if(userfile)
1482 free(userfile);
1483 if(excludefile)
1484 free(excludefile);
1485 if(excludeuser)
1486 free(excludeuser);
1487 free_download();
1488 exit(0);
1489 }
1490
1491 if(date[0] == '\0' && ilf_count[ILF_Sarg]==0) {
1492 strcat(period,tbuf);
1493 strcat(per_hour,tbuf2);
1494 }
1495
1496 if(debugz){
1497 debugaz("data",dia);
1498 debugaz("tbuf",tbuf);
1499 debugaz("period",period);
1500 }
1501
1502 if(debug)
1503 debuga("%s: %s",text[17],period);
1504
1505 // fclose(fp_ou);
1506 if(fp_denied)
1507 fclose(fp_denied);
1508 if(fp_authfail)
1509 fclose(fp_authfail);
1510
1511 if(fp_log != NULL) {
1512 fclose(fp_log);
1513 getword_start(&gwarea,period);
1514 if (getword_multisep(val2,sizeof(val2),&gwarea,'-')<0){
1515 printf("SARG: Maybe you have a broken date range definition.\n");
1516 exit(1);
1517 }
1518 if (getword_multisep(val1,sizeof(val1),&gwarea,'\0')<0){
1519 printf("SARG: Maybe you have a broken date range definition.\n");
1520 exit(1);
1521 }
1522 getword_start(&gwarea,per_hour);
1523 if (getword_multisep(val3,sizeof(val3),&gwarea,'-')<0){
1524 printf("SARG: Maybe you have a broken date range definition.\n");
1525 exit(1);
1526 }
1527 sprintf(val4,"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,val3,val1,gwarea.current);
1528 rename(arq_log,val4);
1529 strcpy(arq_log,val4);
1530
1531 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0) {
1532 sprintf(val1,"\"%s\" \"%s\"",ParsedOutputLogCompress,arq_log);
1533 cstatus=system(val1);
1534 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1535 fprintf(stderr, "SARG: command return status %d\n",WEXITSTATUS(cstatus));
1536 fprintf(stderr, "SARG: command: %s\n",val1);
1537 exit(1);
1538 }
1539 }
1540
1541 if(debug)
1542 debuga("%s %s",text[123],arq_log);
1543 }
1544
1545 if(strstr(ReportType,"denied") != 0) {
1546 sprintf(csort,"sort -T \"%s\" -k 3,3 -k 5,5 -o \"%s\" \"%s\"",tmp,tmp5,tmp4);
1547 cstatus=system(csort);
1548 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1549 fprintf(stderr, "SARG: sort command return status %d\n",WEXITSTATUS(cstatus));
1550 fprintf(stderr, "SARG: sort command: %s\n",csort);
1551 exit(1);
1552 }
1553 unlink(tmp4);
1554 }
1555
1556 sort_users_log(tmp, debug);
1557
1558 if(DataFile[0] != '\0')
1559 data_file(tmp);
1560 else
1561 gerarel();
1562
1563 unlink(tmp2);
1564 if(strstr(ReportType,"denied") != 0)
1565 unlink(tmp5);
1566
1567 if(zip[0] != '\0' && strcmp(zip,"zcat") !=0) {
1568 recomp(arq, zip);
1569 }
1570 // else unlink(arq);
1571
1572 if(strcmp(tmp,"/tmp") != 0) {
1573 unlinkdir(tmp,0);
1574 }
1575
1576 free_excludecodes();
1577 if(userfile)
1578 free(userfile);
1579 if(excludefile)
1580 free(excludefile);
1581 if(excludeuser)
1582 free(excludeuser);
1583 free_download();
1584
1585 if(debug)
1586 debuga("%s",text[21]);
1587
1588 exit(0);
1589
1590 }
1591
1592
1593 static void getusers(const char *pwdfile, int debug)
1594 {
1595
1596 FILE *fp_usr;
1597 char buf[255];
1598 char *str;
1599 long int nreg=0;
1600
1601 if(debug)
1602 debuga("%s: %s",text[60],pwdfile);
1603
1604 if ((fp_usr = fopen(pwdfile, "r")) == NULL) {
1605 fprintf(stderr, "SARG: (getusers) %s: %s - %s\n",text[45],pwdfile,strerror(errno));
1606 exit(1);
1607 }
1608
1609 fseek(fp_usr, 0, SEEK_END);
1610 nreg = ftell(fp_usr);
1611 if (nreg<0) {
1612 printf("SARG: Cannot get the size of file %s",pwdfile);
1613 exit(1);
1614 }
1615 nreg = nreg+5000;
1616 fseek(fp_usr, 0, SEEK_SET);
1617
1618 if((userfile=(char *) malloc(nreg))==NULL){
1619 fprintf(stderr, "SARG: %s (%ld):\n",text[59],nreg);
1620 exit(1);
1621 }
1622
1623 bzero(userfile,nreg);
1624 strcpy(userfile,":");
1625
1626 while(fgets(buf,sizeof(buf),fp_usr)!=NULL) {
1627 str=strchr(buf,':');
1628 if (!str) {
1629 printf("SARG: You have an invalid user in your %s file.\n",pwdfile);
1630 exit(1);
1631 }
1632 str[1]=0;
1633 strcat(userfile,buf);
1634 }
1635
1636 fclose(fp_usr);
1637
1638 return;
1639 }
1640
1641
1642 static void gethexclude(const char *hexfile, int debug)
1643 {
1644
1645 FILE *fp_ex;
1646 char buf[255];
1647 long int nreg=0;
1648
1649 if(debug)
1650 debuga("%s: %s",text[67],hexfile);
1651
1652 if ((fp_ex = fopen(hexfile, "r")) == NULL) {
1653 fprintf(stderr, "SARG: (gethexclude) %s: %s - %s\n",text[45],hexfile,strerror(errno));
1654 exit(1);
1655 }
1656
1657 fseek(fp_ex, 0, SEEK_END);
1658 nreg = ftell(fp_ex);
1659 if (nreg<0) {
1660 printf("SARG: Cannot get the size of file %s",hexfile);
1661 exit(1);
1662 }
1663 nreg += 11;
1664 fseek(fp_ex, 0, SEEK_SET);
1665
1666 if((excludefile=(char *) malloc(nreg))==NULL){
1667 fprintf(stderr, "SARG: %s (%ld):\n",text[59],nreg);
1668 exit(1);
1669 }
1670
1671 bzero(excludefile,nreg);
1672
1673 while(fgets(buf,sizeof(buf),fp_ex)!=NULL){
1674 if(strchr(buf,'#') != NULL)
1675 continue;
1676 fixendofline(buf);
1677 strcat(excludefile,buf);
1678 strcat(excludefile," ");
1679 }
1680
1681 strcat(excludefile,"*END* ");
1682
1683 fclose(fp_ex);
1684
1685 return;
1686 }
1687
1688
1689 static void getuexclude(const char *uexfile, int debug)
1690 {
1691
1692 FILE *fp_ex;
1693 char buf[255];
1694 long int nreg=0;
1695
1696 if(debug)
1697 debuga("%s: %s",text[67],uexfile);
1698
1699 if ((fp_ex = fopen(uexfile, "r")) == NULL) {
1700 fprintf(stderr, "SARG: (gethexclude) %s: %s - %s\n",text[45],uexfile,strerror(errno));
1701 exit(1);
1702 }
1703
1704 fseek(fp_ex, 0, SEEK_END);
1705 nreg = ftell(fp_ex);
1706 if (nreg<0) {
1707 printf("SARG: Cannot get the size of file %s",uexfile);
1708 exit(1);
1709 }
1710 nreg += 11;
1711 fseek(fp_ex, 0, SEEK_SET);
1712
1713 if((excludeuser=(char *) malloc(nreg))==NULL){
1714 fprintf(stderr, "SARG: %s (%ld):\n",text[59],nreg);
1715 exit(1);
1716 }
1717
1718 bzero(excludeuser,nreg);
1719
1720 while(fgets(buf,sizeof(buf),fp_ex)!=NULL){
1721 if(strchr(buf,'#') != NULL)
1722 continue;
1723 fixendofline(buf);
1724 strcat(excludeuser,buf);
1725 strcat(excludeuser," ");
1726 }
1727
1728 strcat(excludeuser,"*END* ");
1729
1730 fclose(fp_ex);
1731
1732 return;
1733 }