]> git.ipfire.org Git - thirdparty/sarg.git/blob - log.c
The date range passed as argument is not restricted to the actual range covered by...
[thirdparty/sarg.git] / log.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2010
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 #define REPORT_EVERY_X_LINES 5000
31 #define MAX_OPEN_USER_FILES 10
32
33 struct userfilestruct
34 {
35 struct userfilestruct *next;
36 struct userinfostruct *user;
37 FILE *file;
38 };
39
40 /*@null@*/static char *userfile=NULL;
41
42 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
43 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
44 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
45
46 static void getusers(const char *pwdfile, int debug);
47
48 int main(int argc,char *argv[])
49 {
50 enum isa_col_id {
51 ISACOL_Ip,
52 ISACOL_UserName,
53 ISACOL_Date,
54 ISACOL_Time,
55 ISACOL_TimeTaken,
56 ISACOL_Bytes,
57 ISACOL_Uri,
58 ISACOL_Status,
59 ISACOL_Last //last entry of the list !
60 };
61 enum InputLogFormat {
62 ILF_Unknown,
63 ILF_Squid,
64 ILF_Common,
65 ILF_Sarg,
66 ILF_Isa,
67 ILF_Last //last entry of the list !
68 };
69
70 FILE *fp_in = NULL, *fp_denied=NULL, *fp_authfail=NULL, *fp_log=NULL;
71
72 char sz_Download_Unsort[ 20000 ] ;
73 FILE * fp_Download_Unsort = NULL ;
74
75 extern int optind;
76 extern int optopt;
77 extern char *optarg;
78
79 char data[255];
80 char elap[255];
81 char ip[MAXLEN];
82 char tam[255];
83 char fun[MAXLEN];
84 char wuser[MAXLEN];
85 char smartfilter[MAXLEN];
86 char dia[128];
87 char mes[30];
88 char ano[30];
89 char hora[30];
90 char wtemp[MAXLEN];
91 char wtemp2[255];
92 char date[255];
93 char arq[255];
94 char arq_log[255];
95 char hm[15], hmf[15], hmr[15];
96 int chm=0;
97 char uagent[MAXLEN];
98 char hexclude[MAXLEN];
99 char csort[MAXLEN];
100 int cstatus;
101 char tbuf2[128];
102 char zip[20];
103 char *str;
104 char tmp2[MAXLEN];
105 char tmp3[MAXLEN];
106 char denied_unsort[MAXLEN];
107 char denied_sort[MAXLEN];
108 char authfail_unsort[MAXLEN];
109 char start_hour[128];
110 char end_hour[128];
111 char *linebuf;
112 char hostname[512];
113 char *url;
114 char *urly;
115 char user[MAX_USER_LEN];
116 enum InputLogFormat ilf;
117 int ilf_count[ILF_Last];
118 int ch;
119 int x;
120 int errflg=0;
121 int puser=0;
122 bool fhost=false;
123 bool dns=false;
124 bool fuser=false;
125 int idata=0;
126 int mindate=0;
127 int maxdate=0;
128 int iarq=0;
129 int isa_ncols=0,isa_cols[ISACOL_Last];
130 bool from_stdin;
131 int blen;
132 int maxopenfiles;
133 int nopen;
134 bool id_is_ip;
135 long totregsl=0;
136 long totregsg=0;
137 long totregsx=0;
138 bool totper=false;
139 long int max_elapsed=0;
140 long long int iyear, imonth, iday;
141 bool realt;
142 bool userip;
143 struct tm tt;
144 struct tm *t;
145 unsigned long recs1=0UL;
146 unsigned long recs2=0UL;
147 int OutputNonZero = REPORT_EVERY_X_LINES ;
148 bool download_flag=false;
149 char *download_url=NULL;
150 struct getwordstruct gwarea;
151 longline line;
152 time_t tnum;
153 struct userinfostruct *uinfo;
154 struct userfilestruct *first_user_file, *ufile, *ufile1, *prev_ufile;
155
156 #ifdef HAVE_LOCALE_H
157 setlocale(LC_TIME,"");
158 #endif
159
160 #if defined(ENABLE_NLS) && defined(HAVE_LOCALE_H)
161 if (!setlocale (LC_ALL, "")) {
162 fprintf(stderr,"SARG: Cannot set the locale LC_ALL to the environment variable\n");
163 exit(EXIT_FAILURE);
164 }
165 if (!bindtextdomain (PACKAGE_NAME, LOCALEDIR)) {
166 fprintf(stderr,"SARG: Cannot bind to text domain %s in directory %s (%s)\n",PACKAGE_NAME,LOCALEDIR,strerror(errno));
167 exit(EXIT_FAILURE);
168 }
169 if (!textdomain (PACKAGE_NAME)) {
170 fprintf(stderr,"SARG: Cannot set gettext domain for %s PACKAGE_NAME (%s)\n",PACKAGE_NAME,strerror(errno));
171 exit(EXIT_FAILURE);
172 }
173 #endif //ENABLE_NLS
174
175 BgImage[0]='\0';
176 LogoImage[0]='\0';
177 LogoText[0]='\0';
178 PasswdFile[0]='\0';
179 OutputEmail[0]='\0';
180 UserAgentLog[0]='\0';
181 ExcludeHosts[0]='\0';
182 ExcludeUsers[0]='\0';
183 ConfigFile[0]='\0';
184 code[0]='\0';
185 LastLog=0;
186 ReportType=0UL;
187 UserTabFile[0]='\0';
188 BlockIt[0]='\0';
189 ExternalCSSFile[0]='\0';
190 SquidGuardLogFormat[0]='\0';
191 SquidGuardLogAlternate[0]='\0';
192 for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
193
194 sprintf(ExcludeCodes,"%s/exclude_codes",SYSCONFDIR);
195 strcpy(GraphDaysBytesBarColor,"orange");
196 strcpy(BgColor,"#ffffff");
197 strcpy(TxColor,"#000000");
198 strcpy(TxBgColor,"lavender");
199 strcpy(TiColor,"darkblue");
200 strcpy(Width,"80");
201 strcpy(Height,"45");
202 strcpy(LogoTextColor,"#000000");
203 strcpy(HeaderColor,"darkblue");
204 strcpy(HeaderBgColor,"#dddddd");
205 strcpy(LogoTextColor,"#006699");
206 strcpy(FontSize,"9px");
207 strcpy(TempDir,"/tmp");
208 strcpy(OutputDir,"/var/www/html/squid-reports");
209 Ip2Name=false;
210 strcpy(DateFormat,"u");
211 OverwriteReport=false;
212 RemoveTempFiles=true;
213 strcpy(ReplaceIndex,"index.html");
214 Index=INDEX_YES;
215 RecordsWithoutUser=RECORDWITHOUTUSER_IP;
216 UseComma=0;
217 strcpy(MailUtility,"mailx");
218 TopSitesNum=100;
219 TopUsersNum=0;
220 UserIp=0;
221 strcpy(TopuserSortField,"BYTES");
222 strcpy(UserSortField,"BYTES");
223 strcpy(TopuserSortOrder,"reverse");
224 strcpy(UserSortOrder,"reverse");
225 strcpy(TopsitesSortField,"CONNECT");
226 strcpy(TopsitesSortType,"D");
227 LongUrl=0;
228 strcpy(FontFace,"Verdana,Tahoma,Arial");
229 strcpy(datetimeby,"elap");
230 strcpy(CharSet,"ISO-8859-1");
231 Privacy=0;
232 strcpy(PrivacyString,"***.***.***.***");
233 strcpy(PrivacyStringColor,"blue");
234 SuccessfulMsg=true;
235 TopUserFields=TOPUSERFIELDS_NUM | TOPUSERFIELDS_DATE_TIME | TOPUSERFIELDS_USERID | TOPUSERFIELDS_CONNECT |
236 TOPUSERFIELDS_BYTES | TOPUSERFIELDS_SETYB | TOPUSERFIELDS_IN_CACHE_OUT |
237 TOPUSERFIELDS_USED_TIME | TOPUSERFIELDS_MILISEC | TOPUSERFIELDS_PTIME |
238 TOPUSERFIELDS_TOTAL | TOPUSERFIELDS_AVERAGE;
239 UserReportFields=USERREPORTFIELDS_CONNECT | USERREPORTFIELDS_BYTES | USERREPORTFIELDS_SETYB |
240 USERREPORTFIELDS_IN_CACHE_OUT | USERREPORTFIELDS_USED_TIME | USERREPORTFIELDS_MILISEC |
241 USERREPORTFIELDS_PTIME | USERREPORTFIELDS_TOTAL | USERREPORTFIELDS_AVERAGE;
242 strcpy(DataFileDelimiter,";");
243 DataFileFields=DATA_FIELD_USER | DATA_FIELD_DATE | DATA_FIELD_TIME | DATA_FIELD_URL | DATA_FIELD_CONNECT |
244 DATA_FIELD_BYTES | DATA_FIELD_IN_CACHE | DATA_FIELD_OUT_CACHE | DATA_FIELD_ELAPSED;
245 ShowReadStatistics=true;
246 strcpy(IndexSortOrder,"D");
247 ShowSargInfo=true;
248 ShowSargLogo=true;
249 strcpy(ParsedOutputLog,"no");
250 strcpy(ParsedOutputLogCompress,"/bin/gzip -f");
251 strcpy(DisplayedValues,"abbreviation");
252 strcpy(HeaderFontSize,"9px");
253 strcpy(TitleFontSize,"11px");
254 strcpy(AuthUserTemplateFile,"sarg_htaccess");
255 set_download_suffix("7z,ace,arj,avi,bat,bin,bz2,bzip,cab,com,cpio,dll,doc,dot,exe,gz,iso,lha,lzh,mdb,mov,mp3,mpeg,mpg,mso,nrg,ogg,ppt,rar,rtf,shs,src,sys,tar,tgz,vcd,vob,wma,wmv,zip");
256 Graphs=true;
257 #if defined(FONTDIR)
258 strcpy(GraphFont,FONTDIR"/DejaVuSans.ttf");
259 #else
260 GraphFont[0]='\0';
261 #endif
262 strcpy(Ulimit,"20000");
263 NtlmUserFormat=NTLMUSERFORMAT_DOMAINUSER;
264 IndexTree=INDEX_TREE_FILE;
265 strcpy(RealtimeTypes,"GET,PUT,CONNECT");
266 strcpy(RealtimeUnauthRec,"show");
267 SquidguardIgnoreDate=0;
268 DansguardianIgnoreDate=0;
269 DataFileUrl=DATAFILEURL_IP;
270 strcpy(MaxElapsed,"28800000");
271 BytesInSitesUsersReport=0;
272 UserAuthentication=0;
273 strcpy(LDAPHost,"127.0.0.1");
274 LDAPPort=389;
275 LDAPProtocolVersion=3;
276 LDAPBindDN[0]='\0';
277 LDAPBindPW[0]='\0';
278 LDAPBaseSearch[0]='\0';
279 strcpy(LDAPFilterSearch, "uid=%s");
280 strcpy(LDAPTargetAttr, "cn");
281
282 dia[0]='\0';
283 mes[0]='\0';
284 ano[0]='\0';
285 hora[0]='\0';
286 tmp[0]='\0';
287 tmp2[0]='\0';
288 tmp3[0]='\0';
289 wtemp[0]='\0';
290 wtemp2[0]='\0';
291 us[0]='\0';
292 date[0]='\0';
293 df[0]='\0';
294 uagent[0]='\0';
295 hexclude[0]='\0';
296 addr[0]='\0';
297 hm[0]='\0';
298 hmf[0]='\0';
299 site[0]='\0';
300 outdir[0]='\0';
301 elap[0]='\0';
302 email[0]='\0';
303 zip[0]='\0';
304 UserInvalidChar[0]='\0';
305 DataFile[0]='\0';
306 SquidGuardConf[0]='\0';
307 DansGuardianConf[0]='\0';
308 start_hour[0]='\0';
309 end_hour[0]='\0';
310
311 denied_count=0;
312 download_count=0;
313 authfail_count=0;
314 dansguardian_count=0;
315 squidguard_count=0;
316 useragent_count=0;
317 DeniedReportLimit=10;
318 AuthfailReportLimit=10;
319 DansGuardianReportLimit=10;
320 SquidGuardReportLimit=10;
321 DownloadReportLimit=50;
322 UserReportLimit=0;
323 debug=0;
324 debugz=0;
325 debugm=0;
326 iprel=false;
327 userip=false;
328 color1=0;
329 color2=0;
330 color3=0;
331 realt=false;
332 realtime_refresh=3;
333 realtime_access_log_lines=1000;
334 cost=0.01;
335 nocost=50000000;
336 ndownload=0;
337 squid24=false;
338 dfrom=0;
339 duntil=0;
340
341 bzero(IncludeUsers, sizeof(IncludeUsers));
342 bzero(ExcludeString, sizeof(ExcludeString));
343 first_user_file=NULL;
344 memset(&period,0,sizeof(period));
345
346 NAccessLog=0;
347 for(x=0; x<MAXLOGS; x++)
348 AccessLog[x][0]='\0';
349 AccessLogFromCmdLine=0;
350
351 strcpy(Title,_("Squid User Access Report"));
352
353 while((ch = getopt(argc, argv, "a:b:c:d:e:f:g:u:l:L:o:s:t:w:hijmnprvxyz")) != -1){
354 switch(ch)
355 {
356 case 'a':
357 strcpy(addr,optarg);
358 break;
359 case 'b':
360 strcpy(uagent,optarg);
361 break;
362 case 'c':
363 strcpy(hexclude,optarg);
364 break;
365 case 'd':
366 strncpy(date,optarg,sizeof(date)-1);
367 date[sizeof(date)-1]='\0';
368 date_from(date, &dfrom, &duntil);
369 break;
370 case 'e':
371 strcpy(email,optarg);
372 break;
373 case 'f':
374 strcpy(ConfigFile,optarg);
375 break;
376 case 'g':
377 strcpy(df,optarg);
378 break;
379 case 'h':
380 usage(argv[0]);
381 exit(EXIT_SUCCESS);
382 case 'i':
383 iprel=true;
384 break;
385 case 'l':
386 if (NAccessLog>=MAXLOGS) {
387 printf(_("SARG: Too many log files passed on command line with option -l.\n"));
388 exit(EXIT_FAILURE);
389 }
390 strcpy(AccessLog[NAccessLog],optarg);
391 NAccessLog++;
392 AccessLogFromCmdLine++;
393 break;
394 case 'L':
395 strcpy(SquidGuardLogAlternate,optarg);
396 break;
397 case 'm':
398 debugm++;
399 break;
400 case 'n':
401 dns=true;
402 break;
403 case 'o':
404 strcpy(outdir,optarg);
405 break;
406 case 'p':
407 userip=true;
408 break;
409 case 'r':
410 realt=true;
411 break;
412 case 's':
413 strcpy(site,optarg);
414 break;
415 case 't':
416 {
417 int h,m;
418
419 if(strstr(optarg,"-") == 0) {
420 strcpy(hm,optarg);
421 strcpy(hmf,optarg);
422 } else {
423 getword_start(&gwarea,optarg);
424 if (getword(hm,sizeof(hm),&gwarea,'-')<0 || getword(hmf,sizeof(hmf),&gwarea,'\0')<0) {
425 debuga(_("The time range passed on the command line with option -t is invalid\n"));
426 exit(EXIT_FAILURE);
427 }
428 }
429 if(sscanf(hm,"%d:%d",&h,&m)!=2) {
430 debuga(_("Time period must be MM or MM:SS. Exit\n"));
431 exit(EXIT_FAILURE);
432 }
433 sprintf(hm,"%02d%02d",h,m);
434 if(sscanf(hmf,"%d:%d",&h,&m)!=2) {
435 debuga(_("Time period must be MM or MM:SS. Exit\n"));
436 exit(EXIT_FAILURE);
437 }
438 sprintf(hmf,"%02d%02d",h,m);
439 break;
440 }
441 case 'u':
442 strcpy(us,optarg);
443 break;
444 case 'v':
445 version();
446 break;
447 case 'w':
448 strcpy(tmp,optarg);
449 break;
450 case 'x':
451 debug++;
452 break;
453 case 'y':
454 langcode++;
455 break;
456 case 'z':
457 debugz++;
458 break;
459 case ':':
460 debuga(_("Option -%c require an argument\n"),optopt);
461 errflg++;
462 break;
463 case '?':
464 usage(argv[0]);
465 exit(EXIT_FAILURE);
466 }
467
468 }
469
470 if (errflg>0) {
471 usage(argv[0]);
472 exit(2);
473 }
474
475 if(debug) debuga(_("Init\n"));
476
477 if(ConfigFile[0] == '\0') snprintf(ConfigFile,sizeof(ConfigFile),"%s/sarg.conf",SYSCONFDIR);
478 if(access(ConfigFile, R_OK) != 0) {
479 debuga(_("Cannot open config file: %s - %s\n"),ConfigFile,strerror(errno));
480 exit(EXIT_FAILURE);
481 }
482
483 if(access(ConfigFile, R_OK) == 0)
484 getconf();
485
486 if(userip) UserIp=true;
487
488 if(dns) Ip2Name=true;
489
490 if(realt) {
491 realtime();
492 exit(EXIT_SUCCESS);
493 }
494
495 if(IndexTree == INDEX_TREE_FILE)
496 strcpy(ImageFile,"../images");
497 else
498 strcpy(ImageFile,"../../../images");
499
500 dataonly=0;
501 if(DataFile[0] != '\0')
502 dataonly++;
503
504 if(NAccessLog == 0) {
505 strcpy(AccessLog[0],"/var/log/squid/access.log");
506 NAccessLog++;
507 }
508
509 if(strcmp(hexclude,"onvert") == 0 && strcmp(site,"plit") != 0) {
510 convlog(AccessLog[0], df, dfrom, duntil);
511 exit(EXIT_SUCCESS);
512 }
513
514 if(strcmp(site,"plit") == 0) {
515 splitlog(AccessLog[0], df, dfrom, duntil, hexclude);
516 exit(EXIT_SUCCESS);
517 }
518
519 load_excludecodes(ExcludeCodes);
520
521 if(access(PasswdFile, R_OK) == 0) {
522 getusers(PasswdFile,debug);
523 puser++;
524 }
525
526 if(hexclude[0] == '\0')
527 strcpy(hexclude,ExcludeHosts);
528 if(hexclude[0] != '\0') {
529 gethexclude(hexclude,debug);
530 fhost=true;
531 }
532
533 if(ReportType == 0) {
534 ReportType=REPORT_TYPE_TOPUSERS | REPORT_TYPE_TOPSITES | REPORT_TYPE_USERS_SITES |
535 REPORT_TYPE_SITES_USERS | REPORT_TYPE_DATE_TIME | REPORT_TYPE_DENIED |
536 REPORT_TYPE_AUTH_FAILURES | REPORT_TYPE_SITE_USER_TIME_DATE | REPORT_TYPE_DOWNLOADS;
537 }
538
539 if(access(ExcludeUsers, R_OK) == 0) {
540 getuexclude(ExcludeUsers,debug);
541 fuser=true;
542 }
543
544 indexonly=0;
545 if(fuser) {
546 if(is_indexonly())
547 indexonly++;
548 }
549 if(strcmp(ExcludeUsers,"indexonly") == 0) indexonly++;
550 if(Index == INDEX_ONLY) indexonly++;
551
552 if(MaxElapsed[0] != '\0') max_elapsed=atol(MaxElapsed);
553
554 if(outdir[0] == '\0') strcpy(outdir,OutputDir);
555 strcat(outdir,"/");
556
557 if(uagent[0] == '\0') strcpy(uagent,UserAgentLog);
558
559 if(tmp[0] == '\0') strcpy(tmp,TempDir);
560 else strcpy(TempDir,tmp);
561
562 if(df[0] == '\0') strcpy(df,DateFormat);
563 else strcpy(DateFormat,df);
564
565 if(df[0] == '\0') {
566 strcpy(df,"u");
567 strcpy(DateFormat,"u");
568 }
569 if (df[0]=='w')
570 IndexTree=INDEX_TREE_FILE;
571
572 if(email[0] == '\0' && OutputEmail[0] != '\0') strcpy(email,OutputEmail);
573
574 strcpy(tmp2,tmp);
575
576 if(email[0] != '\0') {
577 snprintf(wtemp2,sizeof(wtemp2),"%s/sarg",tmp2);
578 my_mkdir(wtemp2);
579 strcat(tmp2,"/sarg");
580 strcpy(outdir,tmp2);
581 strcat(outdir,"/");
582 }
583
584 strcat(tmp2,"/sarg.log");
585
586 sprintf(tmp3,"%s/sarg",tmp);
587 if(access(tmp3, R_OK) == 0) {
588 unlinkdir(tmp3,1);
589 }
590 my_mkdir(tmp3);
591 strcpy(denied_unsort,tmp3);
592 strcpy(denied_sort,tmp3);
593 strcpy(authfail_unsort,tmp3);
594 strcat(denied_unsort,"/denied.log.unsort");
595 strcat(denied_sort,"/denied.log");
596 strcat(authfail_unsort,"/authfail.log.unsort");
597
598 if(debug) {
599 debuga(_("Parameters:\n"));
600 debuga(_(" Hostname or IP address (-a) = %s\n"),addr);
601 debuga(_(" Useragent log (-b) = %s\n"),uagent);
602 debuga(_(" Exclude file (-c) = %s\n"),hexclude);
603 debuga(_(" Date from-until (-d) = %s\n"),date);
604 debuga(_(" Email address to send reports (-e) = %s\n"),email);
605 debuga(_(" Config file (-f) = %s\n"),ConfigFile);
606 if(strcmp(df,"e") == 0)
607 debuga(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
608 if(strcmp(df,"u") == 0)
609 debuga(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
610 if(strcmp(df,"w") == 0)
611 debuga(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
612 debuga(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
613 for (iarq=0 ; iarq<NAccessLog ; iarq++)
614 debuga(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
615 debuga(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
616 debuga(_(" Output dir (-o) = %s\n"),outdir);
617 debuga(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
618 debuga(_(" Accessed site (-s) = %s\n"),site);
619 debuga(_(" Time (-t) = %s\n"),hm);
620 debuga(_(" User (-u) = %s\n"),us);
621 debuga(_(" Temporary dir (-w) = %s\n"),tmp);
622 debuga(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
623 debuga(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
624 debuga("\n");
625 }
626
627 if(debugm) {
628 printf(_("Parameters:\n"));
629 printf(_(" Hostname or IP address (-a) = %s\n"),addr);
630 printf(_(" Useragent log (-b) = %s\n"),uagent);
631 printf(_(" Exclude file (-c) = %s\n"),hexclude);
632 printf(_(" Date from-until (-d) = %s\n"),date);
633 printf(_(" Email address to send reports (-e) = %s\n"),email);
634 printf(_(" Config file (-f) = %s\n"),ConfigFile);
635 if(strcmp(df,"e") == 0)
636 printf(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
637 if(strcmp(df,"u") == 0)
638 printf(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
639 if(strcmp(df,"w") == 0)
640 printf(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
641 printf(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
642 for (iarq=0 ; iarq<NAccessLog ; iarq++)
643 printf(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
644 printf(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
645 printf(_(" Output dir (-o) = %s\n"),outdir);
646 printf(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
647 printf(_(" Accessed site (-s) = %s\n"),site);
648 printf(_(" Time (-t) = %s\n"),hm);
649 printf(_(" User (-u) = %s\n"),us);
650 printf(_(" Temporary dir (-w) = %s\n"),tmp);
651 printf(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
652 printf(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
653 printf(_("sarg version: %s\n"),VERSION);
654 }
655
656 if(debug)
657 debuga(_("sarg version: %s\n"),VERSION);
658
659 maxopenfiles=MAX_OPEN_USER_FILES;
660 #ifdef HAVE_RLIM_T
661 if (Ulimit[0] != '\0') {
662 struct rlimit rl;
663 long l1, l2;
664 int rc=0;
665
666 #if defined(RLIMIT_NOFILE)
667 getrlimit (RLIMIT_NOFILE, &rl);
668 #elif defined(RLIMIT_OFILE)
669 getrlimit (RLIMIT_OFILE, &rl);
670 #else
671 #warning "No rlimit resource for the number of open files"
672 #endif
673 l1 = rl.rlim_cur;
674 l2 = rl.rlim_max;
675
676 rl.rlim_cur = atol(Ulimit);
677 rl.rlim_max = atol(Ulimit);
678 #if defined(RLIMIT_NOFILE)
679 rc=setrlimit (RLIMIT_NOFILE, &rl);
680 #elif defined(RLIMIT_OFILE)
681 rc=setrlimit (RLIMIT_OFILE, &rl);
682 #else
683 #warning "No rlimit resource for the number of open files"
684 #endif
685 if(rc == -1) {
686 debuga(_("setrlimit error - %s\n"),strerror(errno));
687 }
688
689 if(debug)
690 debuga("Maximum file descriptor: cur=%ld max=%ld, changed to cur="RLIM_STRING" max="RLIM_STRING,l1,l2,rl.rlim_cur,rl.rlim_max);
691 }
692 #endif
693
694 init_usertab(UserTabFile);
695
696 if ((line=longline_create())==NULL) {
697 debuga(_("Not enough memory to read a log file\n"));
698 exit(EXIT_FAILURE);
699 }
700
701 snprintf(sz_Download_Unsort,sizeof(sz_Download_Unsort),"%s/sarg/download.unsort", tmp);
702
703 if(DataFile[0]=='\0') {
704 if((ReportType & REPORT_TYPE_DENIED) != 0) {
705 if((fp_denied=MY_FOPEN(denied_unsort,"w"))==NULL) {
706 debuga(_("(log) Cannot open file: %s - %s\n"),denied_unsort,strerror(errno));
707 exit(EXIT_FAILURE);
708 }
709 }
710
711 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
712 if((fp_authfail=MY_FOPEN(authfail_unsort,"w"))==NULL) {
713 debuga(_("(log) Cannot open file: %s - %s\n"),authfail_unsort,strerror(errno));
714 exit(EXIT_FAILURE);
715 }
716 }
717 }
718
719 for (iarq=0 ; iarq<NAccessLog ; iarq++) {
720 strcpy(arq,AccessLog[iarq]);
721
722 strcpy(arqtt,arq);
723
724 if(strcmp(arq,"-")==0) {
725 if(debug)
726 debuga(_("Reading access log file: from stdin\n"));
727 fp_in=stdin;
728 from_stdin=true;
729 } else {
730 decomp(arq,zip,tmp);
731 if(debug)
732 debuga(_("Reading access log file: %s\n"),arq);
733 if((fp_in=MY_FOPEN(arq,"r"))==NULL) {
734 debuga(_("(log) Cannot open log file: %s - %s\n"),arq,strerror(errno));
735 exit(EXIT_FAILURE);
736 }
737 from_stdin=false;
738 }
739 ilf=ILF_Unknown;
740 download_flag=false;
741 // pre-read the file only if we have to show stats
742 if(ShowReadStatistics && !from_stdin) {
743 size_t nread,i;
744 bool skipcr=false;
745 char tmp4[MAXLEN];
746
747 recs1=0UL;
748 recs2=0UL;
749
750 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
751 for (i=0 ; i<nread ; i++)
752 if (skipcr) {
753 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
754 skipcr=false;
755 }
756 } else {
757 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
758 skipcr=true;
759 recs1++;
760 }
761 }
762 }
763 rewind(fp_in);
764 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
765 putchar('\r');
766 fflush( stdout ) ;
767 }
768
769 longline_reset(line);
770
771 while ((linebuf=longline_read(fp_in,line))!=NULL) {
772 blen=strlen(linebuf);
773
774 if (ilf==ILF_Unknown) {
775 if(strncmp(linebuf,"#Software: Mic",14) == 0) {
776 fixendofline(linebuf);
777 if (debug)
778 debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
779 ilf=ILF_Isa;
780 ilf_count[ilf]++;
781 continue;
782 }
783
784 if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
785 if (getperiod_fromsarglog(arqtt,&period)<0) {
786 debuga(_("The name of the file is invalid: %s\n"),arq);
787 exit(EXIT_FAILURE);
788 }
789 ilf=ILF_Sarg;
790 ilf_count[ilf]++;
791 continue;
792 }
793 }
794
795 if(!fp_log && strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg) {
796 if(access(ParsedOutputLog,R_OK) != 0) {
797 my_mkdir(ParsedOutputLog);
798 }
799 sprintf(arq_log,"%s/sarg_temp.log",ParsedOutputLog);
800 if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
801 debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
802 exit(EXIT_FAILURE);
803 }
804 fputs("*** SARG Log ***\n",fp_log);
805 }
806
807 recs2++;
808 if( ShowReadStatistics && !from_stdin && --OutputNonZero<=0) {
809 double perc = recs2 * 100. / recs1 ;
810 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs1,perc);
811 putchar('\r');
812 fflush (stdout);
813 OutputNonZero = REPORT_EVERY_X_LINES ;
814 }
815 if(blen < 58) continue;
816 if(strstr(linebuf,"HTTP/0.0") != 0) continue;
817 if(strstr(linebuf,"logfile turned over") != 0) continue;
818 if(linebuf[0] == ' ') continue;
819
820 // exclude_string
821 if(ExcludeString[0] != '\0') {
822 bool exstring=false;
823 getword_start(&gwarea,ExcludeString);
824 while(strchr(gwarea.current,':') != 0) {
825 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
826 debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
827 exit(EXIT_FAILURE);
828 }
829 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
830 exstring=true;
831 break;
832 }
833 }
834 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
835 exstring=true;
836 if(exstring) continue;
837 }
838
839 totregsl++;
840 if(debugm)
841 printf("BUF=%s\n",linebuf);
842
843 t=NULL;
844 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
845 getword_start(&gwarea,linebuf);
846 if (getword(data,sizeof(data),&gwarea,' ')<0) {
847 debuga(_("Maybe you have a broken time in your access.log file\n"));
848 exit(EXIT_FAILURE);
849 }
850 if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
851 strcpy(ip,data);
852 strcpy(elap,"0");
853 if(squid24) {
854 if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
855 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
856 exit(EXIT_FAILURE);
857 }
858 } else {
859 if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
860 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
861 exit(EXIT_FAILURE);
862 }
863 }
864 if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
865 getword(fun,sizeof(fun),&gwarea,' ')<0) {
866 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
867 exit(EXIT_FAILURE);
868 }
869 if (getword_ptr(linebuf,&url,&gwarea,' ')<0) {
870 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
871 exit(EXIT_FAILURE);
872 }
873 if (getword_skip(MAXLEN,&gwarea,' ')<0) {
874 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
875 exit(EXIT_FAILURE);
876 }
877 if (getword(code2,sizeof(code2),&gwarea,' ')<0) {
878 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
879 exit(EXIT_FAILURE);
880 }
881 if (getword(tam,sizeof(tam),&gwarea,' ')<0) {
882 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
883 exit(EXIT_FAILURE);
884 }
885 if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
886 if (getword(code,sizeof(code),&gwarea,' ')<0) {
887 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
888 exit(EXIT_FAILURE);
889 }
890 } else {
891 if (getword(code,sizeof(code),&gwarea,'\0')<0) {
892 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
893 exit(EXIT_FAILURE);
894 }
895 }
896
897 if ((str = strchr(code, ':')) != NULL)
898 *str = '/';
899
900 if(strcmp(tam,"\0") == 0)
901 strcpy(tam,"0");
902
903 ilf=ILF_Common;
904 ilf_count[ilf]++;
905
906 getword_start(&gwarea,data+1);
907 if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
908 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
909 exit(EXIT_FAILURE);
910 }
911 if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
912 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
913 exit(EXIT_FAILURE);
914 }
915 getword_start(&gwarea,data);
916 if (getword_atoll(&iday,&gwarea,'/')<0){
917 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
918 exit(EXIT_FAILURE);
919 }
920 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
921 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
922 exit(EXIT_FAILURE);
923 }
924 if (getword_atoll(&iyear,&gwarea,'/')<0){
925 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
926 exit(EXIT_FAILURE);
927 }
928
929 imonth=month2num(mes)+1;
930 idata=builddia(iday,imonth,iyear);
931 computedate(iyear,imonth,iday,&tt);
932 t=&tt;
933 }
934
935 if(ilf==ILF_Unknown || ilf==ILF_Squid) {
936 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
937 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
938 exit(EXIT_FAILURE);
939 }
940 while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
941 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
942 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
943 exit(EXIT_FAILURE);
944 }
945 if(strlen(elap) < 1) continue;
946 if (getword(ip,sizeof(ip),&gwarea,' ')<0){
947 debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq);
948 exit(EXIT_FAILURE);
949 }
950 if (getword(code,sizeof(code),&gwarea,' ')<0){
951 debuga(_("Maybe you have a broken result code in your %s file\n"),arq);
952 exit(EXIT_FAILURE);
953 }
954 if (getword(tam,sizeof(tam),&gwarea,' ')<0){
955 debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq);
956 exit(EXIT_FAILURE);
957 }
958 if (getword(fun,sizeof(fun),&gwarea,' ')<0){
959 debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
960 exit(EXIT_FAILURE);
961 }
962 if (getword_ptr(linebuf,&url,&gwarea,' ')<0){
963 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
964 exit(EXIT_FAILURE);
965 }
966 if (getword(user,sizeof(user),&gwarea,' ')<0){
967 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
968 exit(EXIT_FAILURE);
969 }
970 ilf=ILF_Squid;
971 ilf_count[ilf]++;
972
973 tnum=atoi(data);
974 t=localtime(&tnum);
975 if (t == NULL) {
976 debuga(_("Cannot convert the timestamp from the squid log file\n"));
977 exit(EXIT_FAILURE);
978 }
979
980 strftime(tbuf2, sizeof(tbuf2), "%H%M", t);
981
982 idata=(t->tm_year+1900)*10000+(t->tm_mon+1)*100+t->tm_mday;
983
984 if(strncmp(df,"u",1)==0)
985 strftime(dia, sizeof(dia), "%m/%d/%Y", t);
986 else
987 strftime(dia, sizeof(dia), "%d/%m/%Y", t);
988 sprintf(hora,"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec);
989 }
990 }
991 if (ilf==ILF_Sarg) {
992 getword_start(&gwarea,linebuf);
993 if (getword(data,sizeof(data),&gwarea,'\t')<0){
994 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
995 exit(EXIT_FAILURE);
996 }
997 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
998 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
999 exit(EXIT_FAILURE);
1000 }
1001 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
1002 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1003 exit(EXIT_FAILURE);
1004 }
1005 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
1006 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1007 exit(EXIT_FAILURE);
1008 }
1009 if (getword_ptr(linebuf,&url,&gwarea,'\t')<0){
1010 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1011 exit(EXIT_FAILURE);
1012 }
1013 if (getword(tam,sizeof(tam),&gwarea,'\t')<0){
1014 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1015 exit(EXIT_FAILURE);
1016 }
1017 if (getword(code,sizeof(code),&gwarea,'\t')<0){
1018 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1019 exit(EXIT_FAILURE);
1020 }
1021 if (getword(elap,sizeof(elap),&gwarea,'\t')<0){
1022 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1023 exit(EXIT_FAILURE);
1024 }
1025 if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
1026 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1027 exit(EXIT_FAILURE);
1028 }
1029 getword_start(&gwarea,data);
1030 if(strcmp(df,"u") == 0) {
1031 if (getword_atoll(&imonth,&gwarea,'/')<0){
1032 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1033 exit(EXIT_FAILURE);
1034 }
1035 if (getword_atoll(&iday,&gwarea,'/')<0){
1036 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1037 exit(EXIT_FAILURE);
1038 }
1039 } else {
1040 if (getword_atoll(&iday,&gwarea,'/')<0){
1041 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1042 exit(EXIT_FAILURE);
1043 }
1044 if (getword_atoll(&imonth,&gwarea,'/')<0){
1045 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1046 exit(EXIT_FAILURE);
1047 }
1048 }
1049 if (getword_atoll(&iyear,&gwarea,'\0')<0){
1050 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1051 exit(EXIT_FAILURE);
1052 }
1053 idata=builddia(iday,imonth,iyear);
1054 computedate(iyear,imonth,iday,&tt);
1055 t=&tt;
1056 }
1057 if (ilf==ILF_Isa) {
1058 if (linebuf[0] == '#') {
1059 int ncols,cols[ISACOL_Last];
1060
1061 fixendofline(linebuf);
1062 getword_start(&gwarea,linebuf);
1063 // remove the #Fields: column at the beginning of the line
1064 if (getword_skip(1000,&gwarea,' ')<0){
1065 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1066 exit(EXIT_FAILURE);
1067 }
1068 for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
1069 ncols=0;
1070 while(gwarea.current[0] != '\0') {
1071 if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
1072 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1073 exit(EXIT_FAILURE);
1074 }
1075 if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
1076 if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
1077 if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
1078 if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
1079 if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
1080 if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
1081 if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
1082 if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
1083 ncols++;
1084 }
1085 if (cols[ISACOL_Ip]>=0) {
1086 isa_ncols=ncols;
1087 for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
1088 isa_cols[ncols]=cols[ncols];
1089 }
1090 continue;
1091 }
1092 if (!isa_ncols) continue;
1093 getword_start(&gwarea,linebuf);
1094 for (x=0 ; x<isa_ncols ; x++) {
1095 if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
1096 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1097 exit(EXIT_FAILURE);
1098 }
1099 if (x==isa_cols[ISACOL_Ip]) {
1100 if (strlen(str)>=sizeof(ip)) {
1101 debuga(_("Maybe you have a broken IP in your %s file\n"),arq);
1102 exit(EXIT_FAILURE);
1103 }
1104 strcpy(ip,str);
1105 } else if (x==isa_cols[ISACOL_UserName]) {
1106 if (strlen(str)>=sizeof(user)) {
1107 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
1108 exit(EXIT_FAILURE);
1109 }
1110 strcpy(user,str);
1111 } else if (x==isa_cols[ISACOL_Date]) {
1112 if (strlen(str)>=sizeof(data)) {
1113 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1114 exit(EXIT_FAILURE);
1115 }
1116 strcpy(data,str);
1117 } else if (x==isa_cols[ISACOL_Time]) {
1118 if (strlen(str)>=sizeof(hora)) {
1119 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1120 exit(EXIT_FAILURE);
1121 }
1122 strcpy(hora,str);
1123 } else if (x==isa_cols[ISACOL_TimeTaken]) {
1124 if (strlen(str)>=sizeof(elap)) {
1125 debuga(_("Maybe you have a broken download duration in your %s file\n"),arq);
1126 exit(EXIT_FAILURE);
1127 }
1128 strcpy(elap,str);
1129 } else if (x==isa_cols[ISACOL_Bytes]) {
1130 if (strlen(str)>=sizeof(tam)) {
1131 debuga(_("Maybe you have a broken download size in your %s file\n"),arq);
1132 exit(EXIT_FAILURE);
1133 }
1134 strcpy(tam,str);
1135 } else if (x==isa_cols[ISACOL_Uri]) {
1136 url=str;
1137 } else if (x==isa_cols[ISACOL_Status]) {
1138 if (strlen(str)>=sizeof(code)) {
1139 debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
1140 exit(EXIT_FAILURE);
1141 }
1142 strcpy(code,str);
1143 }
1144 }
1145
1146 if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
1147 sprintf(val1,"DENIED/%s",code);
1148 strcpy(code,val1);
1149 }
1150 getword_start(&gwarea,data);
1151 if (getword_atoll(&iyear,&gwarea,'-')<0){
1152 debuga(_("Maybe you have a broken year in your %s file\n"),arq);
1153 exit(EXIT_FAILURE);
1154 }
1155 if (getword_atoll(&imonth,&gwarea,'-')<0){
1156 debuga(_("Maybe you have a broken month in your %s file\n"),arq);
1157 exit(EXIT_FAILURE);
1158 }
1159 if (getword_atoll(&iday,&gwarea,'\0')<0){
1160 debuga(_("Maybe you have a broken day in your %s file\n"),arq);
1161 exit(EXIT_FAILURE);
1162 }
1163
1164 idata=builddia(iday,imonth,iyear);
1165 computedate(iyear,imonth,iday,&tt);
1166 t=&tt;
1167 }
1168 if (t==NULL) {
1169 debuga(_("Unknown input log file format\n"));
1170 break;
1171 }
1172
1173 if(debugm)
1174 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",date,idata,dfrom,duntil);
1175
1176 if(date[0] != '\0'){
1177 if(idata < dfrom || idata > duntil) continue;
1178 }
1179
1180 // Record only hours usage which is required
1181 if (t) {
1182 if( bsearch( &( t -> tm_wday ), weekdays.list, weekdays.len,
1183 sizeof( int ), compar ) == NULL )
1184 continue;
1185
1186 if( bsearch( &( t -> tm_hour ), hours.list, hours.len,
1187 sizeof( int ), compar ) == NULL )
1188 continue;
1189 }
1190
1191
1192 if(strlen(user) > MAX_USER_LEN) {
1193 if (debugm) printf(_("User ID too long: %s\n"),user);
1194 totregsx++;
1195 continue;
1196 }
1197
1198 // include_users
1199 if(IncludeUsers[0] != '\0') {
1200 sprintf(val1,":%s:",user);
1201 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
1202 continue;
1203 }
1204
1205 if(vercode(code)) {
1206 if (debugm) printf(_("Excluded code: %s\n"),code);
1207 totregsx++;
1208 continue;
1209 }
1210
1211 if(testvaliduserchar(user))
1212 continue;
1213
1214 #if 0
1215 if((str = strstr(user,"%20")) != NULL) {
1216 /*
1217 This is a patch introduced to solve bug #1624251 reported at sourceforge but
1218 the side effect is to truncate the name at the first space and merge the reports
1219 of people whose name is identical up to the first space.
1220
1221 The old code used to truncate the user name at the first % if a %20 was
1222 found anywhere in the string. That means the string could be truncated
1223 at the wrong place if another % occured before the %20. This new code should
1224 avoid that problem and only truncate at the space. There is no bug
1225 report indicating that anybody noticed this.
1226 */
1227 *str='\0';
1228 }
1229
1230 /*
1231 Code prior to 2.2.7 used to replace any %xx by a dot as long as a %5c was
1232 found in the user name.
1233 */
1234 while((str = strstr(user,"%5c")) != NULL) {
1235 *str='.';
1236 for (x=3 ; str[x] ; x++) str[x-2]=str[x];
1237 }
1238 #endif
1239
1240 urly=url;
1241
1242 if(ilf!=ILF_Sarg) {
1243 /*
1244 The full URL is not saved in sarg log. There is no point in testing the URL to detect
1245 a downloaded file.
1246 */
1247 download_flag=is_download_suffix(url);
1248 if (download_flag) {
1249 download_url=url;
1250 download_count++;
1251 }
1252 } else
1253 download_flag=false;
1254
1255 // remove any protocol:// at the beginning of the URL
1256 if ((str = strchr(url,'/')) != NULL && str[1] == '/') {
1257 int i;
1258
1259 str+=2;
1260 for (i=0 ; str[i] ; i++)
1261 url[i]=str[i];
1262 url[i]='\0';
1263 }
1264
1265 if(!LongUrl) {
1266 url_hostname(url,hostname,sizeof(hostname));
1267 url=hostname;
1268 }
1269
1270 if(url[0] == '\0') continue;
1271
1272 if(addr[0] != '\0'){
1273 if(strcmp(addr,ip)!=0) continue;
1274 }
1275 if(fhost) {
1276 if(!vhexclude(url)) {
1277 if (debugm) printf(_("Excluded site: %s\n"),url);
1278 totregsx++;
1279 continue;
1280 }
1281 }
1282
1283 if(hm[0] != '\0') {
1284 hmr[0]='\0';
1285 chm++;
1286 getword_start(&gwarea,hora);
1287 while(chm) {
1288 if (getword_multisep(warea,sizeof(warea),&gwarea,':')<0){
1289 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1290 exit(EXIT_FAILURE);
1291 }
1292 strncat(hmr,warea,2);
1293 chm--;
1294 }
1295 strncat(hmr,gwarea.current,2);
1296
1297 if(atoi(hmr) < atoi(hm) || atoi(hmr) > atoi(hmf)) continue;
1298 }
1299
1300 if(site[0] != '\0'){
1301 if(strstr(url,site)==0) continue;
1302 }
1303
1304 if(UserIp) {
1305 strcpy(user,ip);
1306 id_is_ip=true;
1307 } else {
1308 id_is_ip=false;
1309 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
1310 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
1311 strcpy(user,ip);
1312 id_is_ip=true;
1313 }
1314 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
1315 continue;
1316 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
1317 strcpy(user,"everybody");
1318 } else {
1319 strlow(user);
1320 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
1321 if((str = strchr(user,'_')) != 0) {
1322 strcpy(warea,str+1);
1323 strcpy(user,warea);
1324 }
1325 if((str = strchr(user,'+')) != 0) {
1326 strcpy(warea,str+1);
1327 strcpy(user,warea);
1328 }
1329 }
1330 }
1331 }
1332
1333 if(us[0] != '\0'){
1334 if(strcmp(user,us)!=0) continue;
1335 }
1336
1337 if(puser) {
1338 sprintf(wuser,":%s:",user);
1339 if(strstr(userfile, wuser) == 0)
1340 continue;
1341 }
1342
1343 if(fuser) {
1344 if(!vuexclude(user)) {
1345 if (debugm) printf(_("Excluded user: %s\n"),user);
1346 totregsx++;
1347 continue;
1348 }
1349 }
1350
1351 if(strcmp(user,"-") ==0 || strcmp(user," ") ==0 || strcmp(user,"") ==0 || strcmp(user,":") ==0)
1352 continue;
1353
1354 if(max_elapsed) {
1355 if(atol(elap)>max_elapsed) {
1356 elap[0]='0';
1357 elap[1]='\0';
1358 }
1359 }
1360
1361 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
1362 fixendofline(str);
1363 sprintf(smartfilter,"\"%s\"",str+1);
1364 } else sprintf(smartfilter,"\"\"");
1365
1366 nopen=0;
1367 prev_ufile=NULL;
1368 for (ufile=first_user_file ; ufile && strcmp(user,ufile->user->id)!=0 ; ufile=ufile->next) {
1369 prev_ufile=ufile;
1370 if (ufile->file) nopen++;
1371 }
1372 if (!ufile) {
1373 ufile=malloc(sizeof(*ufile));
1374 if (!ufile) {
1375 debuga(_("Not enough memory to store the user %s\n"),user);
1376 exit(EXIT_FAILURE);
1377 }
1378 memset(ufile,0,sizeof(*ufile));
1379 ufile->next=first_user_file;
1380 first_user_file=ufile;
1381 uinfo=userinfo_create(user);
1382 ufile->user=uinfo;
1383 uinfo->id_is_ip=id_is_ip;
1384 } else {
1385 if (prev_ufile) {
1386 prev_ufile->next=ufile->next;
1387 ufile->next=first_user_file;
1388 first_user_file=ufile;
1389 }
1390 }
1391
1392 if (ufile->file==NULL) {
1393 if (nopen>=maxopenfiles) {
1394 x=0;
1395 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
1396 if (ufile1->file!=NULL) {
1397 if (x>=maxopenfiles) {
1398 fclose(ufile1->file);
1399 ufile1->file=NULL;
1400 }
1401 x++;
1402 }
1403 }
1404 }
1405 if (snprintf (tmp3, sizeof(tmp3), "%s/sarg/%s.unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
1406 debuga(_("Temporary user file name too long: %s/sarg/%s.unsort\n"), tmp, ufile->user->filename);
1407 exit(EXIT_FAILURE);
1408 }
1409 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
1410 debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno));
1411 exit (1);
1412 }
1413 }
1414
1415 /*if ( strcmp ( user , sz_Last_User ) != 0 ) {
1416 if ( fp_Write_User )
1417 fclose( fp_Write_User ) ;
1418 sprintf (tmp3, "%s/sarg/%s.unsort", tmp, user);
1419
1420 if ((fp_Write_User = MY_FOPEN (tmp3, "a")) == NULL) {
1421 fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], _("Cannot open temporary file"), tmp3, strerror(errno));
1422 exit (1);
1423 }
1424 strcpy( sz_Last_User , user ) ;
1425 }*/
1426 fprintf(ufile->file, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,ip,url,tam,code,elap,smartfilter);
1427
1428 if(fp_log && ilf!=ILF_Sarg)
1429 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,url,tam,code,elap,smartfilter);
1430
1431 totregsg++;
1432
1433 if(!dataonly && download_flag && download_url && strstr(code,"DENIED") == 0) {
1434 ndownload = 1;
1435
1436 if ( ! fp_Download_Unsort ) {
1437 if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
1438 debuga(_("(log) Cannot open temporary file: %s - %s\n"),tmp3, strerror(errno));
1439 exit (1);
1440 }
1441 }
1442 fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,download_url);
1443 }
1444
1445 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
1446 if(fp_denied && strstr(code,"DENIED/403") != 0) {
1447 fprintf(fp_denied, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1448 denied_count++;
1449 }
1450 if(fp_authfail && (strstr(code,"DENIED/401") != 0 || strstr(code,"DENIED/407") != 0)) {
1451 fprintf(fp_authfail, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1452 authfail_count++;
1453 }
1454 }
1455
1456 if (ilf!=ILF_Sarg) {
1457 if(!totper || idata<mindate){
1458 mindate=idata;
1459 memcpy(&period.start,t,sizeof(*t));
1460 strcpy(start_hour,tbuf2);
1461 }
1462 if (!totper || idata>maxdate) {
1463 maxdate=idata;
1464 memcpy(&period.end,t,sizeof(*t));
1465 }
1466 totper=true;
1467 }
1468
1469 if(debugm){
1470 printf("IP=\t%s\n",ip);
1471 printf("USER=\t%s\n",user);
1472 printf("ELAP=\t%s\n",elap);
1473 printf("DATE=\t%s\n",dia);
1474 printf("TIME=\t%s\n",hora);
1475 printf("FUNC=\t%s\n",fun);
1476 printf("URL=\t%s\n",url);
1477 printf("CODE=\t%s\n",code);
1478 printf("LEN=\t%s\n",tam);
1479 }
1480 }
1481 if (!from_stdin) {
1482 fclose(fp_in);
1483 if( ShowReadStatistics )
1484 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs1, (float) 100 );
1485 }
1486 }
1487
1488 if (debug)
1489 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
1490
1491 longline_destroy(&line);
1492 if ( fp_Download_Unsort )
1493 fclose (fp_Download_Unsort);
1494
1495 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
1496 ufile1=ufile->next;
1497 if (ufile->file!=NULL) fclose(ufile->file);
1498 free(ufile);
1499 }
1500
1501 free_download();
1502 free_excludecodes();
1503 free_exclude();
1504
1505 if(debug) {
1506 int totalcount=0;
1507
1508 for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
1509
1510 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
1511 debuga(_("Log with mixed records format (squid and common log)\n"));
1512
1513 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
1514 debuga(_("Common log format\n"));
1515
1516 if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
1517 debuga(_("Squid log format\n"));
1518
1519 if(ilf_count[ILF_Sarg]>0)
1520 debuga(_("Sarg log format\n"));
1521
1522 if(totalcount==0 && totregsg)
1523 debuga(_("Log with invalid format\n"));
1524 }
1525
1526 if(!totregsg){
1527 debuga(_("No records found\n"));
1528 debuga(_("End\n"));
1529 if(fp_denied) fclose(fp_denied);
1530 if(fp_authfail) fclose(fp_authfail);
1531 userinfo_free();
1532 if(userfile) free(userfile);
1533 close_usertab();
1534 exit(EXIT_SUCCESS);
1535 }
1536
1537 if (date[0]!='\0') {
1538 char date0[30], date1[30];
1539
1540 strftime(date0,sizeof(date0),"%d/%m/%Y",&period.start);
1541 strftime(date1,sizeof(date1),"%d/%m/%Y",&period.end);
1542 debuga(_("Period covered by log files: %s-%s\n"),date0,date1);
1543 getperiod_fromrange(&period,dfrom,duntil);
1544 }
1545 if (getperiod_buildtext(&period)<0) {
1546 debuga(_("Failed to build the string representation of the date range\n"));
1547 exit(EXIT_FAILURE);
1548 }
1549
1550 if(debugz){
1551 debugaz("data",dia);
1552 debugaz("period",period.text);
1553 }
1554
1555 if(debug)
1556 debuga(_("Period: %s\n"),period.text);
1557
1558 // fclose(fp_ou);
1559 if(fp_denied)
1560 fclose(fp_denied);
1561 if(fp_authfail)
1562 fclose(fp_authfail);
1563
1564 if(fp_log != NULL) {
1565 fclose(fp_log);
1566 strcpy(end_hour,tbuf2);
1567 strftime(val2,sizeof(val2),"%d%m%Y",&period.start);
1568 strftime(val1,sizeof(val1),"%d%m%Y",&period.end);
1569 sprintf(val4,"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour);
1570 if (rename(arq_log,val4)) {
1571 debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno));
1572 } else {
1573 strcpy(arq_log,val4);
1574
1575 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
1576 /*
1577 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
1578 necessary around the command name, put them in the configuration file.
1579 */
1580 sprintf(val1,"%s \"%s\"",ParsedOutputLogCompress,arq_log);
1581 cstatus=system(val1);
1582 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1583 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
1584 debuga(_("command: %s\n"),val1);
1585 exit(EXIT_FAILURE);
1586 }
1587 }
1588 }
1589 if(debug)
1590 debuga(_("Sarg parsed log saved as %s\n"),arq_log);
1591 }
1592
1593 if(DataFile[0] == '\0' && (ReportType & REPORT_TYPE_DENIED) != 0) {
1594 sprintf(csort,"sort -T \"%s\" -k 3,3 -k 5,5 -o \"%s\" \"%s\"",tmp,denied_sort,denied_unsort);
1595 cstatus=system(csort);
1596 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1597 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
1598 debuga(_("sort command: %s\n"),csort);
1599 exit(EXIT_FAILURE);
1600 }
1601 unlink(denied_unsort);
1602 }
1603
1604 sort_users_log(tmp, debug);
1605
1606 if(DataFile[0] != '\0')
1607 data_file(tmp);
1608 else
1609 gerarel();
1610
1611 unlink(tmp2);
1612 if((ReportType & REPORT_TYPE_DENIED) != 0)
1613 unlink(denied_sort);
1614
1615 if(zip[0] != '\0' && strcmp(zip,"zcat") !=0) {
1616 recomp(arq, zip);
1617 }
1618 // else unlink(arq);
1619
1620 if(strcmp(tmp,"/tmp") != 0) {
1621 unlinkdir(tmp,0);
1622 }
1623
1624 userinfo_free();
1625 if(userfile)
1626 free(userfile);
1627 close_usertab();
1628
1629 if(debug)
1630 debuga(_("End\n"));
1631
1632 exit(EXIT_SUCCESS);
1633
1634 }
1635
1636
1637 static void getusers(const char *pwdfile, int debug)
1638 {
1639
1640 FILE *fp_usr;
1641 char buf[255];
1642 char *str;
1643 long int nreg=0;
1644
1645 if(debug)
1646 debuga(_("Loading password file from %s\n"),pwdfile);
1647
1648 if ((fp_usr = fopen(pwdfile, "r")) == NULL) {
1649 debuga(_("(getusers) Cannot open file %s - %s\n"),pwdfile,strerror(errno));
1650 exit(EXIT_FAILURE);
1651 }
1652
1653 fseek(fp_usr, 0, SEEK_END);
1654 nreg = ftell(fp_usr);
1655 if (nreg<0) {
1656 debuga(_("Cannot get the size of file %s\n"),pwdfile);
1657 exit(EXIT_FAILURE);
1658 }
1659 nreg = nreg+5000;
1660 fseek(fp_usr, 0, SEEK_SET);
1661
1662 if((userfile=(char *) malloc(nreg))==NULL){
1663 debuga(_("malloc error (%ld)\n"),nreg);
1664 exit(EXIT_FAILURE);
1665 }
1666
1667 bzero(userfile,nreg);
1668 strcpy(userfile,":");
1669
1670 while(fgets(buf,sizeof(buf),fp_usr)!=NULL) {
1671 str=strchr(buf,':');
1672 if (!str) {
1673 debuga(_("You have an invalid user in your %s file\n"),pwdfile);
1674 exit(EXIT_FAILURE);
1675 }
1676 str[1]='\0';
1677 strcat(userfile,buf);
1678 }
1679
1680 fclose(fp_usr);
1681
1682 return;
1683 }