]> git.ipfire.org Git - thirdparty/sarg.git/blob - log.c
Ignore input log files too old to contain any entry to process
[thirdparty/sarg.git] / log.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2010
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 #define REPORT_EVERY_X_LINES 5000
31 #define MAX_OPEN_USER_FILES 10
32
33 struct userfilestruct
34 {
35 struct userfilestruct *next;
36 struct userinfostruct *user;
37 FILE *file;
38 };
39
40 /*@null@*/static char *userfile=NULL;
41
42 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
43 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
44 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
45
46 static void getusers(const char *pwdfile, int debug);
47
48 int main(int argc,char *argv[])
49 {
50 enum isa_col_id {
51 ISACOL_Ip,
52 ISACOL_UserName,
53 ISACOL_Date,
54 ISACOL_Time,
55 ISACOL_TimeTaken,
56 ISACOL_Bytes,
57 ISACOL_Uri,
58 ISACOL_Status,
59 ISACOL_Last //last entry of the list !
60 };
61 enum InputLogFormat {
62 ILF_Unknown,
63 ILF_Squid,
64 ILF_Common,
65 ILF_Sarg,
66 ILF_Isa,
67 ILF_Last //last entry of the list !
68 };
69
70 FILE *fp_in = NULL, *fp_denied=NULL, *fp_authfail=NULL, *fp_log=NULL;
71
72 char sz_Download_Unsort[ 20000 ] ;
73 FILE * fp_Download_Unsort = NULL ;
74
75 extern int optind;
76 extern int optopt;
77 extern char *optarg;
78
79 char data[255];
80 char elap[255];
81 char ip[MAXLEN];
82 char tam[255];
83 char fun[MAXLEN];
84 char wuser[MAXLEN];
85 char smartfilter[MAXLEN];
86 char dia[128];
87 char mes[30];
88 char ano[30];
89 char hora[30];
90 char wtemp[MAXLEN];
91 char wtemp2[255];
92 char date[255];
93 char arq[255];
94 char arq_log[255];
95 char hm[15], hmf[15], hmr[15];
96 int chm=0;
97 char uagent[MAXLEN];
98 char hexclude[MAXLEN];
99 char csort[MAXLEN];
100 int cstatus;
101 char tbuf2[128];
102 char zip[20];
103 char *str;
104 char tmp2[MAXLEN];
105 char tmp3[MAXLEN];
106 char denied_unsort[MAXLEN];
107 char denied_sort[MAXLEN];
108 char authfail_unsort[MAXLEN];
109 char start_hour[128];
110 char end_hour[128];
111 char *linebuf;
112 char hostname[512];
113 char *url;
114 char *urly;
115 char user[MAX_USER_LEN];
116 enum InputLogFormat ilf;
117 int ilf_count[ILF_Last];
118 int ch;
119 int x;
120 int errflg=0;
121 int puser=0;
122 bool fhost=false;
123 bool dns=false;
124 bool fuser=false;
125 int idata=0;
126 int mindate=0;
127 int maxdate=0;
128 int iarq=0;
129 int isa_ncols=0,isa_cols[ISACOL_Last];
130 bool from_stdin;
131 int blen;
132 int maxopenfiles;
133 int nopen;
134 bool id_is_ip;
135 long totregsl=0;
136 long totregsg=0;
137 long totregsx=0;
138 bool totper=false;
139 long int max_elapsed=0;
140 long long int iyear, imonth, iday;
141 bool realt;
142 bool userip;
143 struct tm tt;
144 struct tm *t;
145 unsigned long recs1=0UL;
146 unsigned long recs2=0UL;
147 int OutputNonZero = REPORT_EVERY_X_LINES ;
148 bool download_flag=false;
149 char *download_url=NULL;
150 struct getwordstruct gwarea;
151 longline line;
152 time_t tnum;
153 struct stat logstat;
154 struct userinfostruct *uinfo;
155 struct userfilestruct *first_user_file, *ufile, *ufile1, *prev_ufile;
156
157 #ifdef HAVE_LOCALE_H
158 setlocale(LC_TIME,"");
159 #endif
160
161 #if defined(ENABLE_NLS) && defined(HAVE_LOCALE_H)
162 if (!setlocale (LC_ALL, "")) {
163 fprintf(stderr,"SARG: Cannot set the locale LC_ALL to the environment variable\n");
164 exit(EXIT_FAILURE);
165 }
166 if (!bindtextdomain (PACKAGE_NAME, LOCALEDIR)) {
167 fprintf(stderr,"SARG: Cannot bind to text domain %s in directory %s (%s)\n",PACKAGE_NAME,LOCALEDIR,strerror(errno));
168 exit(EXIT_FAILURE);
169 }
170 if (!textdomain (PACKAGE_NAME)) {
171 fprintf(stderr,"SARG: Cannot set gettext domain for %s PACKAGE_NAME (%s)\n",PACKAGE_NAME,strerror(errno));
172 exit(EXIT_FAILURE);
173 }
174 #endif //ENABLE_NLS
175
176 BgImage[0]='\0';
177 LogoImage[0]='\0';
178 LogoText[0]='\0';
179 PasswdFile[0]='\0';
180 OutputEmail[0]='\0';
181 UserAgentLog[0]='\0';
182 ExcludeHosts[0]='\0';
183 ExcludeUsers[0]='\0';
184 ConfigFile[0]='\0';
185 code[0]='\0';
186 LastLog=0;
187 ReportType=0UL;
188 UserTabFile[0]='\0';
189 BlockIt[0]='\0';
190 ExternalCSSFile[0]='\0';
191 SquidGuardLogFormat[0]='\0';
192 SquidGuardLogAlternate[0]='\0';
193 for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
194
195 sprintf(ExcludeCodes,"%s/exclude_codes",SYSCONFDIR);
196 strcpy(GraphDaysBytesBarColor,"orange");
197 strcpy(BgColor,"#ffffff");
198 strcpy(TxColor,"#000000");
199 strcpy(TxBgColor,"lavender");
200 strcpy(TiColor,"darkblue");
201 strcpy(Width,"80");
202 strcpy(Height,"45");
203 strcpy(LogoTextColor,"#000000");
204 strcpy(HeaderColor,"darkblue");
205 strcpy(HeaderBgColor,"#dddddd");
206 strcpy(LogoTextColor,"#006699");
207 strcpy(FontSize,"9px");
208 strcpy(TempDir,"/tmp");
209 strcpy(OutputDir,"/var/www/html/squid-reports");
210 Ip2Name=false;
211 strcpy(DateFormat,"u");
212 OverwriteReport=false;
213 RemoveTempFiles=true;
214 strcpy(ReplaceIndex,"index.html");
215 Index=INDEX_YES;
216 RecordsWithoutUser=RECORDWITHOUTUSER_IP;
217 UseComma=0;
218 strcpy(MailUtility,"mailx");
219 TopSitesNum=100;
220 TopUsersNum=0;
221 UserIp=0;
222 strcpy(TopuserSortField,"BYTES");
223 strcpy(UserSortField,"BYTES");
224 strcpy(TopuserSortOrder,"reverse");
225 strcpy(UserSortOrder,"reverse");
226 strcpy(TopsitesSortField,"CONNECT");
227 strcpy(TopsitesSortType,"D");
228 LongUrl=0;
229 strcpy(FontFace,"Verdana,Tahoma,Arial");
230 strcpy(datetimeby,"elap");
231 strcpy(CharSet,"ISO-8859-1");
232 Privacy=0;
233 strcpy(PrivacyString,"***.***.***.***");
234 strcpy(PrivacyStringColor,"blue");
235 SuccessfulMsg=true;
236 TopUserFields=TOPUSERFIELDS_NUM | TOPUSERFIELDS_DATE_TIME | TOPUSERFIELDS_USERID | TOPUSERFIELDS_CONNECT |
237 TOPUSERFIELDS_BYTES | TOPUSERFIELDS_SETYB | TOPUSERFIELDS_IN_CACHE_OUT |
238 TOPUSERFIELDS_USED_TIME | TOPUSERFIELDS_MILISEC | TOPUSERFIELDS_PTIME |
239 TOPUSERFIELDS_TOTAL | TOPUSERFIELDS_AVERAGE;
240 UserReportFields=USERREPORTFIELDS_CONNECT | USERREPORTFIELDS_BYTES | USERREPORTFIELDS_SETYB |
241 USERREPORTFIELDS_IN_CACHE_OUT | USERREPORTFIELDS_USED_TIME | USERREPORTFIELDS_MILISEC |
242 USERREPORTFIELDS_PTIME | USERREPORTFIELDS_TOTAL | USERREPORTFIELDS_AVERAGE;
243 strcpy(DataFileDelimiter,";");
244 DataFileFields=DATA_FIELD_USER | DATA_FIELD_DATE | DATA_FIELD_TIME | DATA_FIELD_URL | DATA_FIELD_CONNECT |
245 DATA_FIELD_BYTES | DATA_FIELD_IN_CACHE | DATA_FIELD_OUT_CACHE | DATA_FIELD_ELAPSED;
246 ShowReadStatistics=true;
247 strcpy(IndexSortOrder,"D");
248 ShowSargInfo=true;
249 ShowSargLogo=true;
250 strcpy(ParsedOutputLog,"no");
251 strcpy(ParsedOutputLogCompress,"/bin/gzip -f");
252 strcpy(DisplayedValues,"abbreviation");
253 strcpy(HeaderFontSize,"9px");
254 strcpy(TitleFontSize,"11px");
255 strcpy(AuthUserTemplateFile,"sarg_htaccess");
256 set_download_suffix("7z,ace,arj,avi,bat,bin,bz2,bzip,cab,com,cpio,dll,doc,dot,exe,gz,iso,lha,lzh,mdb,mov,mp3,mpeg,mpg,mso,nrg,ogg,ppt,rar,rtf,shs,src,sys,tar,tgz,vcd,vob,wma,wmv,zip");
257 Graphs=true;
258 #if defined(FONTDIR)
259 strcpy(GraphFont,FONTDIR"/DejaVuSans.ttf");
260 #else
261 GraphFont[0]='\0';
262 #endif
263 strcpy(Ulimit,"20000");
264 NtlmUserFormat=NTLMUSERFORMAT_DOMAINUSER;
265 IndexTree=INDEX_TREE_FILE;
266 strcpy(RealtimeTypes,"GET,PUT,CONNECT");
267 strcpy(RealtimeUnauthRec,"show");
268 SquidguardIgnoreDate=0;
269 DansguardianIgnoreDate=0;
270 DataFileUrl=DATAFILEURL_IP;
271 strcpy(MaxElapsed,"28800000");
272 BytesInSitesUsersReport=0;
273 UserAuthentication=0;
274 strcpy(LDAPHost,"127.0.0.1");
275 LDAPPort=389;
276 LDAPProtocolVersion=3;
277 LDAPBindDN[0]='\0';
278 LDAPBindPW[0]='\0';
279 LDAPBaseSearch[0]='\0';
280 strcpy(LDAPFilterSearch, "uid=%s");
281 strcpy(LDAPTargetAttr, "cn");
282
283 dia[0]='\0';
284 mes[0]='\0';
285 ano[0]='\0';
286 hora[0]='\0';
287 tmp[0]='\0';
288 tmp2[0]='\0';
289 tmp3[0]='\0';
290 wtemp[0]='\0';
291 wtemp2[0]='\0';
292 us[0]='\0';
293 date[0]='\0';
294 df[0]='\0';
295 uagent[0]='\0';
296 hexclude[0]='\0';
297 addr[0]='\0';
298 hm[0]='\0';
299 hmf[0]='\0';
300 site[0]='\0';
301 outdir[0]='\0';
302 elap[0]='\0';
303 email[0]='\0';
304 zip[0]='\0';
305 UserInvalidChar[0]='\0';
306 DataFile[0]='\0';
307 SquidGuardConf[0]='\0';
308 DansGuardianConf[0]='\0';
309 start_hour[0]='\0';
310 end_hour[0]='\0';
311
312 denied_count=0;
313 download_count=0;
314 authfail_count=0;
315 dansguardian_count=0;
316 squidguard_count=0;
317 useragent_count=0;
318 DeniedReportLimit=10;
319 AuthfailReportLimit=10;
320 DansGuardianReportLimit=10;
321 SquidGuardReportLimit=10;
322 DownloadReportLimit=50;
323 UserReportLimit=0;
324 debug=0;
325 debugz=0;
326 debugm=0;
327 iprel=false;
328 userip=false;
329 color1=0;
330 color2=0;
331 color3=0;
332 realt=false;
333 realtime_refresh=3;
334 realtime_access_log_lines=1000;
335 cost=0.01;
336 nocost=50000000;
337 ndownload=0;
338 squid24=false;
339 dfrom=0;
340 duntil=0;
341
342 bzero(IncludeUsers, sizeof(IncludeUsers));
343 bzero(ExcludeString, sizeof(ExcludeString));
344 first_user_file=NULL;
345 memset(&period,0,sizeof(period));
346
347 NAccessLog=0;
348 for(x=0; x<MAXLOGS; x++)
349 AccessLog[x][0]='\0';
350 AccessLogFromCmdLine=0;
351
352 strcpy(Title,_("Squid User Access Report"));
353
354 while((ch = getopt(argc, argv, "a:b:c:d:e:f:g:u:l:L:o:s:t:w:hijmnprvxyz")) != -1){
355 switch(ch)
356 {
357 case 'a':
358 strcpy(addr,optarg);
359 break;
360 case 'b':
361 strcpy(uagent,optarg);
362 break;
363 case 'c':
364 strcpy(hexclude,optarg);
365 break;
366 case 'd':
367 strncpy(date,optarg,sizeof(date)-1);
368 date[sizeof(date)-1]='\0';
369 date_from(date, &dfrom, &duntil);
370 break;
371 case 'e':
372 strcpy(email,optarg);
373 break;
374 case 'f':
375 strcpy(ConfigFile,optarg);
376 break;
377 case 'g':
378 strcpy(df,optarg);
379 break;
380 case 'h':
381 usage(argv[0]);
382 exit(EXIT_SUCCESS);
383 case 'i':
384 iprel=true;
385 break;
386 case 'l':
387 if (NAccessLog>=MAXLOGS) {
388 printf(_("SARG: Too many log files passed on command line with option -l.\n"));
389 exit(EXIT_FAILURE);
390 }
391 strcpy(AccessLog[NAccessLog],optarg);
392 NAccessLog++;
393 AccessLogFromCmdLine++;
394 break;
395 case 'L':
396 strcpy(SquidGuardLogAlternate,optarg);
397 break;
398 case 'm':
399 debugm++;
400 break;
401 case 'n':
402 dns=true;
403 break;
404 case 'o':
405 strcpy(outdir,optarg);
406 break;
407 case 'p':
408 userip=true;
409 break;
410 case 'r':
411 realt=true;
412 break;
413 case 's':
414 strcpy(site,optarg);
415 break;
416 case 't':
417 {
418 int h,m;
419
420 if(strstr(optarg,"-") == 0) {
421 strcpy(hm,optarg);
422 strcpy(hmf,optarg);
423 } else {
424 getword_start(&gwarea,optarg);
425 if (getword(hm,sizeof(hm),&gwarea,'-')<0 || getword(hmf,sizeof(hmf),&gwarea,'\0')<0) {
426 debuga(_("The time range passed on the command line with option -t is invalid\n"));
427 exit(EXIT_FAILURE);
428 }
429 }
430 if(sscanf(hm,"%d:%d",&h,&m)!=2) {
431 debuga(_("Time period must be MM or MM:SS. Exit\n"));
432 exit(EXIT_FAILURE);
433 }
434 sprintf(hm,"%02d%02d",h,m);
435 if(sscanf(hmf,"%d:%d",&h,&m)!=2) {
436 debuga(_("Time period must be MM or MM:SS. Exit\n"));
437 exit(EXIT_FAILURE);
438 }
439 sprintf(hmf,"%02d%02d",h,m);
440 break;
441 }
442 case 'u':
443 strcpy(us,optarg);
444 break;
445 case 'v':
446 version();
447 break;
448 case 'w':
449 strcpy(tmp,optarg);
450 break;
451 case 'x':
452 debug++;
453 break;
454 case 'y':
455 langcode++;
456 break;
457 case 'z':
458 debugz++;
459 break;
460 case ':':
461 debuga(_("Option -%c require an argument\n"),optopt);
462 errflg++;
463 break;
464 case '?':
465 usage(argv[0]);
466 exit(EXIT_FAILURE);
467 }
468
469 }
470
471 if (errflg>0) {
472 usage(argv[0]);
473 exit(2);
474 }
475
476 if(debug) debuga(_("Init\n"));
477
478 if(ConfigFile[0] == '\0') snprintf(ConfigFile,sizeof(ConfigFile),"%s/sarg.conf",SYSCONFDIR);
479 if(access(ConfigFile, R_OK) != 0) {
480 debuga(_("Cannot open config file: %s - %s\n"),ConfigFile,strerror(errno));
481 exit(EXIT_FAILURE);
482 }
483
484 if(access(ConfigFile, R_OK) == 0)
485 getconf();
486
487 if(userip) UserIp=true;
488
489 if(dns) Ip2Name=true;
490
491 if(realt) {
492 realtime();
493 exit(EXIT_SUCCESS);
494 }
495
496 if(IndexTree == INDEX_TREE_FILE)
497 strcpy(ImageFile,"../images");
498 else
499 strcpy(ImageFile,"../../../images");
500
501 dataonly=0;
502 if(DataFile[0] != '\0')
503 dataonly++;
504
505 if(NAccessLog == 0) {
506 strcpy(AccessLog[0],"/var/log/squid/access.log");
507 NAccessLog++;
508 }
509
510 if(strcmp(hexclude,"onvert") == 0 && strcmp(site,"plit") != 0) {
511 convlog(AccessLog[0], df, dfrom, duntil);
512 exit(EXIT_SUCCESS);
513 }
514
515 if(strcmp(site,"plit") == 0) {
516 splitlog(AccessLog[0], df, dfrom, duntil, hexclude);
517 exit(EXIT_SUCCESS);
518 }
519
520 load_excludecodes(ExcludeCodes);
521
522 if(access(PasswdFile, R_OK) == 0) {
523 getusers(PasswdFile,debug);
524 puser++;
525 }
526
527 if(hexclude[0] == '\0')
528 strcpy(hexclude,ExcludeHosts);
529 if(hexclude[0] != '\0') {
530 gethexclude(hexclude,debug);
531 fhost=true;
532 }
533
534 if(ReportType == 0) {
535 ReportType=REPORT_TYPE_TOPUSERS | REPORT_TYPE_TOPSITES | REPORT_TYPE_USERS_SITES |
536 REPORT_TYPE_SITES_USERS | REPORT_TYPE_DATE_TIME | REPORT_TYPE_DENIED |
537 REPORT_TYPE_AUTH_FAILURES | REPORT_TYPE_SITE_USER_TIME_DATE | REPORT_TYPE_DOWNLOADS;
538 }
539
540 if(access(ExcludeUsers, R_OK) == 0) {
541 getuexclude(ExcludeUsers,debug);
542 fuser=true;
543 }
544
545 indexonly=0;
546 if(fuser) {
547 if(is_indexonly())
548 indexonly++;
549 }
550 if(strcmp(ExcludeUsers,"indexonly") == 0) indexonly++;
551 if(Index == INDEX_ONLY) indexonly++;
552
553 if(MaxElapsed[0] != '\0') max_elapsed=atol(MaxElapsed);
554
555 if(outdir[0] == '\0') strcpy(outdir,OutputDir);
556 strcat(outdir,"/");
557
558 if(uagent[0] == '\0') strcpy(uagent,UserAgentLog);
559
560 if(tmp[0] == '\0') strcpy(tmp,TempDir);
561 else strcpy(TempDir,tmp);
562
563 if(df[0] == '\0') strcpy(df,DateFormat);
564 else strcpy(DateFormat,df);
565
566 if(df[0] == '\0') {
567 strcpy(df,"u");
568 strcpy(DateFormat,"u");
569 }
570 if (df[0]=='w')
571 IndexTree=INDEX_TREE_FILE;
572
573 if(email[0] == '\0' && OutputEmail[0] != '\0') strcpy(email,OutputEmail);
574
575 strcpy(tmp2,tmp);
576
577 if(email[0] != '\0') {
578 snprintf(wtemp2,sizeof(wtemp2),"%s/sarg",tmp2);
579 my_mkdir(wtemp2);
580 strcat(tmp2,"/sarg");
581 strcpy(outdir,tmp2);
582 strcat(outdir,"/");
583 }
584
585 strcat(tmp2,"/sarg.log");
586
587 sprintf(tmp3,"%s/sarg",tmp);
588 if(access(tmp3, R_OK) == 0) {
589 unlinkdir(tmp3,1);
590 }
591 my_mkdir(tmp3);
592 strcpy(denied_unsort,tmp3);
593 strcpy(denied_sort,tmp3);
594 strcpy(authfail_unsort,tmp3);
595 strcat(denied_unsort,"/denied.log.unsort");
596 strcat(denied_sort,"/denied.log");
597 strcat(authfail_unsort,"/authfail.log.unsort");
598
599 if(debug) {
600 debuga(_("Parameters:\n"));
601 debuga(_(" Hostname or IP address (-a) = %s\n"),addr);
602 debuga(_(" Useragent log (-b) = %s\n"),uagent);
603 debuga(_(" Exclude file (-c) = %s\n"),hexclude);
604 debuga(_(" Date from-until (-d) = %s\n"),date);
605 debuga(_(" Email address to send reports (-e) = %s\n"),email);
606 debuga(_(" Config file (-f) = %s\n"),ConfigFile);
607 if(strcmp(df,"e") == 0)
608 debuga(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
609 if(strcmp(df,"u") == 0)
610 debuga(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
611 if(strcmp(df,"w") == 0)
612 debuga(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
613 debuga(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
614 for (iarq=0 ; iarq<NAccessLog ; iarq++)
615 debuga(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
616 debuga(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
617 debuga(_(" Output dir (-o) = %s\n"),outdir);
618 debuga(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
619 debuga(_(" Accessed site (-s) = %s\n"),site);
620 debuga(_(" Time (-t) = %s\n"),hm);
621 debuga(_(" User (-u) = %s\n"),us);
622 debuga(_(" Temporary dir (-w) = %s\n"),tmp);
623 debuga(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
624 debuga(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
625 debuga("\n");
626 }
627
628 if(debugm) {
629 printf(_("Parameters:\n"));
630 printf(_(" Hostname or IP address (-a) = %s\n"),addr);
631 printf(_(" Useragent log (-b) = %s\n"),uagent);
632 printf(_(" Exclude file (-c) = %s\n"),hexclude);
633 printf(_(" Date from-until (-d) = %s\n"),date);
634 printf(_(" Email address to send reports (-e) = %s\n"),email);
635 printf(_(" Config file (-f) = %s\n"),ConfigFile);
636 if(strcmp(df,"e") == 0)
637 printf(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
638 if(strcmp(df,"u") == 0)
639 printf(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
640 if(strcmp(df,"w") == 0)
641 printf(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
642 printf(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
643 for (iarq=0 ; iarq<NAccessLog ; iarq++)
644 printf(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
645 printf(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
646 printf(_(" Output dir (-o) = %s\n"),outdir);
647 printf(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
648 printf(_(" Accessed site (-s) = %s\n"),site);
649 printf(_(" Time (-t) = %s\n"),hm);
650 printf(_(" User (-u) = %s\n"),us);
651 printf(_(" Temporary dir (-w) = %s\n"),tmp);
652 printf(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
653 printf(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
654 printf(_("sarg version: %s\n"),VERSION);
655 }
656
657 if(debug)
658 debuga(_("sarg version: %s\n"),VERSION);
659
660 maxopenfiles=MAX_OPEN_USER_FILES;
661 #ifdef HAVE_RLIM_T
662 if (Ulimit[0] != '\0') {
663 struct rlimit rl;
664 long l1, l2;
665 int rc=0;
666
667 #if defined(RLIMIT_NOFILE)
668 getrlimit (RLIMIT_NOFILE, &rl);
669 #elif defined(RLIMIT_OFILE)
670 getrlimit (RLIMIT_OFILE, &rl);
671 #else
672 #warning "No rlimit resource for the number of open files"
673 #endif
674 l1 = rl.rlim_cur;
675 l2 = rl.rlim_max;
676
677 rl.rlim_cur = atol(Ulimit);
678 rl.rlim_max = atol(Ulimit);
679 #if defined(RLIMIT_NOFILE)
680 rc=setrlimit (RLIMIT_NOFILE, &rl);
681 #elif defined(RLIMIT_OFILE)
682 rc=setrlimit (RLIMIT_OFILE, &rl);
683 #else
684 #warning "No rlimit resource for the number of open files"
685 #endif
686 if(rc == -1) {
687 debuga(_("setrlimit error - %s\n"),strerror(errno));
688 }
689
690 if(debug)
691 debuga("Maximum file descriptor: cur=%ld max=%ld, changed to cur="RLIM_STRING" max="RLIM_STRING,l1,l2,rl.rlim_cur,rl.rlim_max);
692 }
693 #endif
694
695 init_usertab(UserTabFile);
696
697 if ((line=longline_create())==NULL) {
698 debuga(_("Not enough memory to read a log file\n"));
699 exit(EXIT_FAILURE);
700 }
701
702 snprintf(sz_Download_Unsort,sizeof(sz_Download_Unsort),"%s/sarg/download.unsort", tmp);
703
704 if(DataFile[0]=='\0') {
705 if((ReportType & REPORT_TYPE_DENIED) != 0) {
706 if((fp_denied=MY_FOPEN(denied_unsort,"w"))==NULL) {
707 debuga(_("(log) Cannot open file: %s - %s\n"),denied_unsort,strerror(errno));
708 exit(EXIT_FAILURE);
709 }
710 }
711
712 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
713 if((fp_authfail=MY_FOPEN(authfail_unsort,"w"))==NULL) {
714 debuga(_("(log) Cannot open file: %s - %s\n"),authfail_unsort,strerror(errno));
715 exit(EXIT_FAILURE);
716 }
717 }
718 }
719
720 for (iarq=0 ; iarq<NAccessLog ; iarq++) {
721 strcpy(arq,AccessLog[iarq]);
722
723 strcpy(arqtt,arq);
724
725 if(strcmp(arq,"-")==0) {
726 if(debug)
727 debuga(_("Reading access log file: from stdin\n"));
728 fp_in=stdin;
729 from_stdin=true;
730 } else {
731 if (date[0]!='\0') {
732 if (stat(arq,&logstat)!=0) {
733 debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
734 } else {
735 struct tm *logtime=localtime(&logstat.st_mtime);
736 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
737 debuga(_("Ignoring old log file %s\n"),arq);
738 continue;
739 }
740 }
741 }
742 decomp(arq,zip,tmp);
743 if(debug)
744 debuga(_("Reading access log file: %s\n"),arq);
745 if((fp_in=MY_FOPEN(arq,"r"))==NULL) {
746 debuga(_("(log) Cannot open log file: %s - %s\n"),arq,strerror(errno));
747 exit(EXIT_FAILURE);
748 }
749 from_stdin=false;
750 }
751 ilf=ILF_Unknown;
752 download_flag=false;
753 // pre-read the file only if we have to show stats
754 if(ShowReadStatistics && !from_stdin) {
755 size_t nread,i;
756 bool skipcr=false;
757 char tmp4[MAXLEN];
758
759 recs1=0UL;
760 recs2=0UL;
761
762 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
763 for (i=0 ; i<nread ; i++)
764 if (skipcr) {
765 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
766 skipcr=false;
767 }
768 } else {
769 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
770 skipcr=true;
771 recs1++;
772 }
773 }
774 }
775 rewind(fp_in);
776 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
777 putchar('\r');
778 fflush( stdout ) ;
779 }
780
781 longline_reset(line);
782
783 while ((linebuf=longline_read(fp_in,line))!=NULL) {
784 blen=strlen(linebuf);
785
786 if (ilf==ILF_Unknown) {
787 if(strncmp(linebuf,"#Software: Mic",14) == 0) {
788 fixendofline(linebuf);
789 if (debug)
790 debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
791 ilf=ILF_Isa;
792 ilf_count[ilf]++;
793 continue;
794 }
795
796 if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
797 if (getperiod_fromsarglog(arqtt,&period)<0) {
798 debuga(_("The name of the file is invalid: %s\n"),arq);
799 exit(EXIT_FAILURE);
800 }
801 ilf=ILF_Sarg;
802 ilf_count[ilf]++;
803 continue;
804 }
805 }
806
807 if(!fp_log && strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg) {
808 if(access(ParsedOutputLog,R_OK) != 0) {
809 my_mkdir(ParsedOutputLog);
810 }
811 sprintf(arq_log,"%s/sarg_temp.log",ParsedOutputLog);
812 if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
813 debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
814 exit(EXIT_FAILURE);
815 }
816 fputs("*** SARG Log ***\n",fp_log);
817 }
818
819 recs2++;
820 if( ShowReadStatistics && !from_stdin && --OutputNonZero<=0) {
821 double perc = recs2 * 100. / recs1 ;
822 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs1,perc);
823 putchar('\r');
824 fflush (stdout);
825 OutputNonZero = REPORT_EVERY_X_LINES ;
826 }
827 if(blen < 58) continue;
828 if(strstr(linebuf,"HTTP/0.0") != 0) continue;
829 if(strstr(linebuf,"logfile turned over") != 0) continue;
830 if(linebuf[0] == ' ') continue;
831
832 // exclude_string
833 if(ExcludeString[0] != '\0') {
834 bool exstring=false;
835 getword_start(&gwarea,ExcludeString);
836 while(strchr(gwarea.current,':') != 0) {
837 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
838 debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
839 exit(EXIT_FAILURE);
840 }
841 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
842 exstring=true;
843 break;
844 }
845 }
846 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
847 exstring=true;
848 if(exstring) continue;
849 }
850
851 totregsl++;
852 if(debugm)
853 printf("BUF=%s\n",linebuf);
854
855 t=NULL;
856 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
857 getword_start(&gwarea,linebuf);
858 if (getword(data,sizeof(data),&gwarea,' ')<0) {
859 debuga(_("Maybe you have a broken time in your access.log file\n"));
860 exit(EXIT_FAILURE);
861 }
862 if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
863 strcpy(ip,data);
864 strcpy(elap,"0");
865 if(squid24) {
866 if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
867 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
868 exit(EXIT_FAILURE);
869 }
870 } else {
871 if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
872 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
873 exit(EXIT_FAILURE);
874 }
875 }
876 if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
877 getword(fun,sizeof(fun),&gwarea,' ')<0) {
878 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
879 exit(EXIT_FAILURE);
880 }
881 if (getword_ptr(linebuf,&url,&gwarea,' ')<0) {
882 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
883 exit(EXIT_FAILURE);
884 }
885 if (getword_skip(MAXLEN,&gwarea,' ')<0) {
886 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
887 exit(EXIT_FAILURE);
888 }
889 if (getword(code2,sizeof(code2),&gwarea,' ')<0) {
890 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
891 exit(EXIT_FAILURE);
892 }
893 if (getword(tam,sizeof(tam),&gwarea,' ')<0) {
894 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
895 exit(EXIT_FAILURE);
896 }
897 if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
898 if (getword(code,sizeof(code),&gwarea,' ')<0) {
899 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
900 exit(EXIT_FAILURE);
901 }
902 } else {
903 if (getword(code,sizeof(code),&gwarea,'\0')<0) {
904 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
905 exit(EXIT_FAILURE);
906 }
907 }
908
909 if ((str = strchr(code, ':')) != NULL)
910 *str = '/';
911
912 if(strcmp(tam,"\0") == 0)
913 strcpy(tam,"0");
914
915 ilf=ILF_Common;
916 ilf_count[ilf]++;
917
918 getword_start(&gwarea,data+1);
919 if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
920 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
921 exit(EXIT_FAILURE);
922 }
923 if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
924 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
925 exit(EXIT_FAILURE);
926 }
927 getword_start(&gwarea,data);
928 if (getword_atoll(&iday,&gwarea,'/')<0){
929 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
930 exit(EXIT_FAILURE);
931 }
932 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
933 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
934 exit(EXIT_FAILURE);
935 }
936 if (getword_atoll(&iyear,&gwarea,'/')<0){
937 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
938 exit(EXIT_FAILURE);
939 }
940
941 imonth=month2num(mes)+1;
942 idata=builddia(iday,imonth,iyear);
943 computedate(iyear,imonth,iday,&tt);
944 t=&tt;
945 }
946
947 if(ilf==ILF_Unknown || ilf==ILF_Squid) {
948 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
949 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
950 exit(EXIT_FAILURE);
951 }
952 while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
953 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
954 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
955 exit(EXIT_FAILURE);
956 }
957 if(strlen(elap) < 1) continue;
958 if (getword(ip,sizeof(ip),&gwarea,' ')<0){
959 debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq);
960 exit(EXIT_FAILURE);
961 }
962 if (getword(code,sizeof(code),&gwarea,' ')<0){
963 debuga(_("Maybe you have a broken result code in your %s file\n"),arq);
964 exit(EXIT_FAILURE);
965 }
966 if (getword(tam,sizeof(tam),&gwarea,' ')<0){
967 debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq);
968 exit(EXIT_FAILURE);
969 }
970 if (getword(fun,sizeof(fun),&gwarea,' ')<0){
971 debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
972 exit(EXIT_FAILURE);
973 }
974 if (getword_ptr(linebuf,&url,&gwarea,' ')<0){
975 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
976 exit(EXIT_FAILURE);
977 }
978 if (getword(user,sizeof(user),&gwarea,' ')<0){
979 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
980 exit(EXIT_FAILURE);
981 }
982 ilf=ILF_Squid;
983 ilf_count[ilf]++;
984
985 tnum=atoi(data);
986 t=localtime(&tnum);
987 if (t == NULL) {
988 debuga(_("Cannot convert the timestamp from the squid log file\n"));
989 exit(EXIT_FAILURE);
990 }
991
992 strftime(tbuf2, sizeof(tbuf2), "%H%M", t);
993
994 idata=(t->tm_year+1900)*10000+(t->tm_mon+1)*100+t->tm_mday;
995
996 if(strncmp(df,"u",1)==0)
997 strftime(dia, sizeof(dia), "%m/%d/%Y", t);
998 else
999 strftime(dia, sizeof(dia), "%d/%m/%Y", t);
1000 sprintf(hora,"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec);
1001 }
1002 }
1003 if (ilf==ILF_Sarg) {
1004 getword_start(&gwarea,linebuf);
1005 if (getword(data,sizeof(data),&gwarea,'\t')<0){
1006 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1007 exit(EXIT_FAILURE);
1008 }
1009 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
1010 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1011 exit(EXIT_FAILURE);
1012 }
1013 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
1014 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1015 exit(EXIT_FAILURE);
1016 }
1017 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
1018 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1019 exit(EXIT_FAILURE);
1020 }
1021 if (getword_ptr(linebuf,&url,&gwarea,'\t')<0){
1022 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1023 exit(EXIT_FAILURE);
1024 }
1025 if (getword(tam,sizeof(tam),&gwarea,'\t')<0){
1026 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1027 exit(EXIT_FAILURE);
1028 }
1029 if (getword(code,sizeof(code),&gwarea,'\t')<0){
1030 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1031 exit(EXIT_FAILURE);
1032 }
1033 if (getword(elap,sizeof(elap),&gwarea,'\t')<0){
1034 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1035 exit(EXIT_FAILURE);
1036 }
1037 if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
1038 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1039 exit(EXIT_FAILURE);
1040 }
1041 getword_start(&gwarea,data);
1042 if(strcmp(df,"u") == 0) {
1043 if (getword_atoll(&imonth,&gwarea,'/')<0){
1044 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1045 exit(EXIT_FAILURE);
1046 }
1047 if (getword_atoll(&iday,&gwarea,'/')<0){
1048 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1049 exit(EXIT_FAILURE);
1050 }
1051 } else {
1052 if (getword_atoll(&iday,&gwarea,'/')<0){
1053 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1054 exit(EXIT_FAILURE);
1055 }
1056 if (getword_atoll(&imonth,&gwarea,'/')<0){
1057 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1058 exit(EXIT_FAILURE);
1059 }
1060 }
1061 if (getword_atoll(&iyear,&gwarea,'\0')<0){
1062 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1063 exit(EXIT_FAILURE);
1064 }
1065 idata=builddia(iday,imonth,iyear);
1066 computedate(iyear,imonth,iday,&tt);
1067 t=&tt;
1068 }
1069 if (ilf==ILF_Isa) {
1070 if (linebuf[0] == '#') {
1071 int ncols,cols[ISACOL_Last];
1072
1073 fixendofline(linebuf);
1074 getword_start(&gwarea,linebuf);
1075 // remove the #Fields: column at the beginning of the line
1076 if (getword_skip(1000,&gwarea,' ')<0){
1077 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1078 exit(EXIT_FAILURE);
1079 }
1080 for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
1081 ncols=0;
1082 while(gwarea.current[0] != '\0') {
1083 if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
1084 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1085 exit(EXIT_FAILURE);
1086 }
1087 if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
1088 if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
1089 if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
1090 if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
1091 if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
1092 if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
1093 if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
1094 if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
1095 ncols++;
1096 }
1097 if (cols[ISACOL_Ip]>=0) {
1098 isa_ncols=ncols;
1099 for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
1100 isa_cols[ncols]=cols[ncols];
1101 }
1102 continue;
1103 }
1104 if (!isa_ncols) continue;
1105 getword_start(&gwarea,linebuf);
1106 for (x=0 ; x<isa_ncols ; x++) {
1107 if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
1108 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1109 exit(EXIT_FAILURE);
1110 }
1111 if (x==isa_cols[ISACOL_Ip]) {
1112 if (strlen(str)>=sizeof(ip)) {
1113 debuga(_("Maybe you have a broken IP in your %s file\n"),arq);
1114 exit(EXIT_FAILURE);
1115 }
1116 strcpy(ip,str);
1117 } else if (x==isa_cols[ISACOL_UserName]) {
1118 if (strlen(str)>=sizeof(user)) {
1119 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
1120 exit(EXIT_FAILURE);
1121 }
1122 strcpy(user,str);
1123 } else if (x==isa_cols[ISACOL_Date]) {
1124 if (strlen(str)>=sizeof(data)) {
1125 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1126 exit(EXIT_FAILURE);
1127 }
1128 strcpy(data,str);
1129 } else if (x==isa_cols[ISACOL_Time]) {
1130 if (strlen(str)>=sizeof(hora)) {
1131 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1132 exit(EXIT_FAILURE);
1133 }
1134 strcpy(hora,str);
1135 } else if (x==isa_cols[ISACOL_TimeTaken]) {
1136 if (strlen(str)>=sizeof(elap)) {
1137 debuga(_("Maybe you have a broken download duration in your %s file\n"),arq);
1138 exit(EXIT_FAILURE);
1139 }
1140 strcpy(elap,str);
1141 } else if (x==isa_cols[ISACOL_Bytes]) {
1142 if (strlen(str)>=sizeof(tam)) {
1143 debuga(_("Maybe you have a broken download size in your %s file\n"),arq);
1144 exit(EXIT_FAILURE);
1145 }
1146 strcpy(tam,str);
1147 } else if (x==isa_cols[ISACOL_Uri]) {
1148 url=str;
1149 } else if (x==isa_cols[ISACOL_Status]) {
1150 if (strlen(str)>=sizeof(code)) {
1151 debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
1152 exit(EXIT_FAILURE);
1153 }
1154 strcpy(code,str);
1155 }
1156 }
1157
1158 if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
1159 sprintf(val1,"DENIED/%s",code);
1160 strcpy(code,val1);
1161 }
1162 getword_start(&gwarea,data);
1163 if (getword_atoll(&iyear,&gwarea,'-')<0){
1164 debuga(_("Maybe you have a broken year in your %s file\n"),arq);
1165 exit(EXIT_FAILURE);
1166 }
1167 if (getword_atoll(&imonth,&gwarea,'-')<0){
1168 debuga(_("Maybe you have a broken month in your %s file\n"),arq);
1169 exit(EXIT_FAILURE);
1170 }
1171 if (getword_atoll(&iday,&gwarea,'\0')<0){
1172 debuga(_("Maybe you have a broken day in your %s file\n"),arq);
1173 exit(EXIT_FAILURE);
1174 }
1175
1176 idata=builddia(iday,imonth,iyear);
1177 computedate(iyear,imonth,iday,&tt);
1178 t=&tt;
1179 }
1180 if (t==NULL) {
1181 debuga(_("Unknown input log file format\n"));
1182 break;
1183 }
1184
1185 if(debugm)
1186 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",date,idata,dfrom,duntil);
1187
1188 if(date[0] != '\0'){
1189 if(idata < dfrom || idata > duntil) continue;
1190 }
1191
1192 // Record only hours usage which is required
1193 if (t) {
1194 if( bsearch( &( t -> tm_wday ), weekdays.list, weekdays.len,
1195 sizeof( int ), compar ) == NULL )
1196 continue;
1197
1198 if( bsearch( &( t -> tm_hour ), hours.list, hours.len,
1199 sizeof( int ), compar ) == NULL )
1200 continue;
1201 }
1202
1203
1204 if(strlen(user) > MAX_USER_LEN) {
1205 if (debugm) printf(_("User ID too long: %s\n"),user);
1206 totregsx++;
1207 continue;
1208 }
1209
1210 // include_users
1211 if(IncludeUsers[0] != '\0') {
1212 sprintf(val1,":%s:",user);
1213 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
1214 continue;
1215 }
1216
1217 if(vercode(code)) {
1218 if (debugm) printf(_("Excluded code: %s\n"),code);
1219 totregsx++;
1220 continue;
1221 }
1222
1223 if(testvaliduserchar(user))
1224 continue;
1225
1226 #if 0
1227 if((str = strstr(user,"%20")) != NULL) {
1228 /*
1229 This is a patch introduced to solve bug #1624251 reported at sourceforge but
1230 the side effect is to truncate the name at the first space and merge the reports
1231 of people whose name is identical up to the first space.
1232
1233 The old code used to truncate the user name at the first % if a %20 was
1234 found anywhere in the string. That means the string could be truncated
1235 at the wrong place if another % occured before the %20. This new code should
1236 avoid that problem and only truncate at the space. There is no bug
1237 report indicating that anybody noticed this.
1238 */
1239 *str='\0';
1240 }
1241
1242 /*
1243 Code prior to 2.2.7 used to replace any %xx by a dot as long as a %5c was
1244 found in the user name.
1245 */
1246 while((str = strstr(user,"%5c")) != NULL) {
1247 *str='.';
1248 for (x=3 ; str[x] ; x++) str[x-2]=str[x];
1249 }
1250 #endif
1251
1252 urly=url;
1253
1254 if(ilf!=ILF_Sarg) {
1255 /*
1256 The full URL is not saved in sarg log. There is no point in testing the URL to detect
1257 a downloaded file.
1258 */
1259 download_flag=is_download_suffix(url);
1260 if (download_flag) {
1261 download_url=url;
1262 download_count++;
1263 }
1264 } else
1265 download_flag=false;
1266
1267 // remove any protocol:// at the beginning of the URL
1268 if ((str = strchr(url,'/')) != NULL && str[1] == '/') {
1269 int i;
1270
1271 str+=2;
1272 for (i=0 ; str[i] ; i++)
1273 url[i]=str[i];
1274 url[i]='\0';
1275 }
1276
1277 if(!LongUrl) {
1278 url_hostname(url,hostname,sizeof(hostname));
1279 url=hostname;
1280 }
1281
1282 if(url[0] == '\0') continue;
1283
1284 if(addr[0] != '\0'){
1285 if(strcmp(addr,ip)!=0) continue;
1286 }
1287 if(fhost) {
1288 if(!vhexclude(url)) {
1289 if (debugm) printf(_("Excluded site: %s\n"),url);
1290 totregsx++;
1291 continue;
1292 }
1293 }
1294
1295 if(hm[0] != '\0') {
1296 hmr[0]='\0';
1297 chm++;
1298 getword_start(&gwarea,hora);
1299 while(chm) {
1300 if (getword_multisep(warea,sizeof(warea),&gwarea,':')<0){
1301 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1302 exit(EXIT_FAILURE);
1303 }
1304 strncat(hmr,warea,2);
1305 chm--;
1306 }
1307 strncat(hmr,gwarea.current,2);
1308
1309 if(atoi(hmr) < atoi(hm) || atoi(hmr) > atoi(hmf)) continue;
1310 }
1311
1312 if(site[0] != '\0'){
1313 if(strstr(url,site)==0) continue;
1314 }
1315
1316 if(UserIp) {
1317 strcpy(user,ip);
1318 id_is_ip=true;
1319 } else {
1320 id_is_ip=false;
1321 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
1322 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
1323 strcpy(user,ip);
1324 id_is_ip=true;
1325 }
1326 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
1327 continue;
1328 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
1329 strcpy(user,"everybody");
1330 } else {
1331 strlow(user);
1332 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
1333 if((str = strchr(user,'_')) != 0) {
1334 strcpy(warea,str+1);
1335 strcpy(user,warea);
1336 }
1337 if((str = strchr(user,'+')) != 0) {
1338 strcpy(warea,str+1);
1339 strcpy(user,warea);
1340 }
1341 }
1342 }
1343 }
1344
1345 if(us[0] != '\0'){
1346 if(strcmp(user,us)!=0) continue;
1347 }
1348
1349 if(puser) {
1350 sprintf(wuser,":%s:",user);
1351 if(strstr(userfile, wuser) == 0)
1352 continue;
1353 }
1354
1355 if(fuser) {
1356 if(!vuexclude(user)) {
1357 if (debugm) printf(_("Excluded user: %s\n"),user);
1358 totregsx++;
1359 continue;
1360 }
1361 }
1362
1363 if(strcmp(user,"-") ==0 || strcmp(user," ") ==0 || strcmp(user,"") ==0 || strcmp(user,":") ==0)
1364 continue;
1365
1366 if(max_elapsed) {
1367 if(atol(elap)>max_elapsed) {
1368 elap[0]='0';
1369 elap[1]='\0';
1370 }
1371 }
1372
1373 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
1374 fixendofline(str);
1375 sprintf(smartfilter,"\"%s\"",str+1);
1376 } else sprintf(smartfilter,"\"\"");
1377
1378 nopen=0;
1379 prev_ufile=NULL;
1380 for (ufile=first_user_file ; ufile && strcmp(user,ufile->user->id)!=0 ; ufile=ufile->next) {
1381 prev_ufile=ufile;
1382 if (ufile->file) nopen++;
1383 }
1384 if (!ufile) {
1385 ufile=malloc(sizeof(*ufile));
1386 if (!ufile) {
1387 debuga(_("Not enough memory to store the user %s\n"),user);
1388 exit(EXIT_FAILURE);
1389 }
1390 memset(ufile,0,sizeof(*ufile));
1391 ufile->next=first_user_file;
1392 first_user_file=ufile;
1393 uinfo=userinfo_create(user);
1394 ufile->user=uinfo;
1395 uinfo->id_is_ip=id_is_ip;
1396 } else {
1397 if (prev_ufile) {
1398 prev_ufile->next=ufile->next;
1399 ufile->next=first_user_file;
1400 first_user_file=ufile;
1401 }
1402 }
1403
1404 if (ufile->file==NULL) {
1405 if (nopen>=maxopenfiles) {
1406 x=0;
1407 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
1408 if (ufile1->file!=NULL) {
1409 if (x>=maxopenfiles) {
1410 fclose(ufile1->file);
1411 ufile1->file=NULL;
1412 }
1413 x++;
1414 }
1415 }
1416 }
1417 if (snprintf (tmp3, sizeof(tmp3), "%s/sarg/%s.unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
1418 debuga(_("Temporary user file name too long: %s/sarg/%s.unsort\n"), tmp, ufile->user->filename);
1419 exit(EXIT_FAILURE);
1420 }
1421 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
1422 debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno));
1423 exit (1);
1424 }
1425 }
1426
1427 /*if ( strcmp ( user , sz_Last_User ) != 0 ) {
1428 if ( fp_Write_User )
1429 fclose( fp_Write_User ) ;
1430 sprintf (tmp3, "%s/sarg/%s.unsort", tmp, user);
1431
1432 if ((fp_Write_User = MY_FOPEN (tmp3, "a")) == NULL) {
1433 fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], _("Cannot open temporary file"), tmp3, strerror(errno));
1434 exit (1);
1435 }
1436 strcpy( sz_Last_User , user ) ;
1437 }*/
1438 fprintf(ufile->file, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,ip,url,tam,code,elap,smartfilter);
1439
1440 if(fp_log && ilf!=ILF_Sarg)
1441 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,url,tam,code,elap,smartfilter);
1442
1443 totregsg++;
1444
1445 if(!dataonly && download_flag && download_url && strstr(code,"DENIED") == 0) {
1446 ndownload = 1;
1447
1448 if ( ! fp_Download_Unsort ) {
1449 if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
1450 debuga(_("(log) Cannot open temporary file: %s - %s\n"),tmp3, strerror(errno));
1451 exit (1);
1452 }
1453 }
1454 fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,download_url);
1455 }
1456
1457 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
1458 if(fp_denied && strstr(code,"DENIED/403") != 0) {
1459 fprintf(fp_denied, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1460 denied_count++;
1461 }
1462 if(fp_authfail && (strstr(code,"DENIED/401") != 0 || strstr(code,"DENIED/407") != 0)) {
1463 fprintf(fp_authfail, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1464 authfail_count++;
1465 }
1466 }
1467
1468 if (ilf!=ILF_Sarg) {
1469 if(!totper || idata<mindate){
1470 mindate=idata;
1471 memcpy(&period.start,t,sizeof(*t));
1472 strcpy(start_hour,tbuf2);
1473 }
1474 if (!totper || idata>maxdate) {
1475 maxdate=idata;
1476 memcpy(&period.end,t,sizeof(*t));
1477 }
1478 totper=true;
1479 }
1480
1481 if(debugm){
1482 printf("IP=\t%s\n",ip);
1483 printf("USER=\t%s\n",user);
1484 printf("ELAP=\t%s\n",elap);
1485 printf("DATE=\t%s\n",dia);
1486 printf("TIME=\t%s\n",hora);
1487 printf("FUNC=\t%s\n",fun);
1488 printf("URL=\t%s\n",url);
1489 printf("CODE=\t%s\n",code);
1490 printf("LEN=\t%s\n",tam);
1491 }
1492 }
1493 if (!from_stdin) {
1494 fclose(fp_in);
1495 if( ShowReadStatistics )
1496 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs1, (float) 100 );
1497 }
1498 }
1499
1500 if (debug)
1501 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
1502
1503 longline_destroy(&line);
1504 if ( fp_Download_Unsort )
1505 fclose (fp_Download_Unsort);
1506
1507 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
1508 ufile1=ufile->next;
1509 if (ufile->file!=NULL) fclose(ufile->file);
1510 free(ufile);
1511 }
1512
1513 free_download();
1514 free_excludecodes();
1515 free_exclude();
1516
1517 if(debug) {
1518 int totalcount=0;
1519
1520 for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
1521
1522 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
1523 debuga(_("Log with mixed records format (squid and common log)\n"));
1524
1525 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
1526 debuga(_("Common log format\n"));
1527
1528 if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
1529 debuga(_("Squid log format\n"));
1530
1531 if(ilf_count[ILF_Sarg]>0)
1532 debuga(_("Sarg log format\n"));
1533
1534 if(totalcount==0 && totregsg)
1535 debuga(_("Log with invalid format\n"));
1536 }
1537
1538 if(!totregsg){
1539 debuga(_("No records found\n"));
1540 debuga(_("End\n"));
1541 if(fp_denied) fclose(fp_denied);
1542 if(fp_authfail) fclose(fp_authfail);
1543 userinfo_free();
1544 if(userfile) free(userfile);
1545 close_usertab();
1546 exit(EXIT_SUCCESS);
1547 }
1548
1549 if (date[0]!='\0') {
1550 char date0[30], date1[30];
1551
1552 strftime(date0,sizeof(date0),"%d/%m/%Y",&period.start);
1553 strftime(date1,sizeof(date1),"%d/%m/%Y",&period.end);
1554 debuga(_("Period covered by log files: %s-%s\n"),date0,date1);
1555 getperiod_fromrange(&period,dfrom,duntil);
1556 }
1557 if (getperiod_buildtext(&period)<0) {
1558 debuga(_("Failed to build the string representation of the date range\n"));
1559 exit(EXIT_FAILURE);
1560 }
1561
1562 if(debugz){
1563 debugaz("data",dia);
1564 debugaz("period",period.text);
1565 }
1566
1567 if(debug)
1568 debuga(_("Period: %s\n"),period.text);
1569
1570 // fclose(fp_ou);
1571 if(fp_denied)
1572 fclose(fp_denied);
1573 if(fp_authfail)
1574 fclose(fp_authfail);
1575
1576 if(fp_log != NULL) {
1577 fclose(fp_log);
1578 strcpy(end_hour,tbuf2);
1579 strftime(val2,sizeof(val2),"%d%m%Y",&period.start);
1580 strftime(val1,sizeof(val1),"%d%m%Y",&period.end);
1581 sprintf(val4,"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour);
1582 if (rename(arq_log,val4)) {
1583 debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno));
1584 } else {
1585 strcpy(arq_log,val4);
1586
1587 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
1588 /*
1589 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
1590 necessary around the command name, put them in the configuration file.
1591 */
1592 sprintf(val1,"%s \"%s\"",ParsedOutputLogCompress,arq_log);
1593 cstatus=system(val1);
1594 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1595 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
1596 debuga(_("command: %s\n"),val1);
1597 exit(EXIT_FAILURE);
1598 }
1599 }
1600 }
1601 if(debug)
1602 debuga(_("Sarg parsed log saved as %s\n"),arq_log);
1603 }
1604
1605 if(DataFile[0] == '\0' && (ReportType & REPORT_TYPE_DENIED) != 0) {
1606 sprintf(csort,"sort -T \"%s\" -k 3,3 -k 5,5 -o \"%s\" \"%s\"",tmp,denied_sort,denied_unsort);
1607 cstatus=system(csort);
1608 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1609 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
1610 debuga(_("sort command: %s\n"),csort);
1611 exit(EXIT_FAILURE);
1612 }
1613 unlink(denied_unsort);
1614 }
1615
1616 sort_users_log(tmp, debug);
1617
1618 if(DataFile[0] != '\0')
1619 data_file(tmp);
1620 else
1621 gerarel();
1622
1623 unlink(tmp2);
1624 if((ReportType & REPORT_TYPE_DENIED) != 0)
1625 unlink(denied_sort);
1626
1627 if(zip[0] != '\0' && strcmp(zip,"zcat") !=0) {
1628 recomp(arq, zip);
1629 }
1630 // else unlink(arq);
1631
1632 if(strcmp(tmp,"/tmp") != 0) {
1633 unlinkdir(tmp,0);
1634 }
1635
1636 userinfo_free();
1637 if(userfile)
1638 free(userfile);
1639 close_usertab();
1640
1641 if(debug)
1642 debuga(_("End\n"));
1643
1644 exit(EXIT_SUCCESS);
1645
1646 }
1647
1648
1649 static void getusers(const char *pwdfile, int debug)
1650 {
1651
1652 FILE *fp_usr;
1653 char buf[255];
1654 char *str;
1655 long int nreg=0;
1656
1657 if(debug)
1658 debuga(_("Loading password file from %s\n"),pwdfile);
1659
1660 if ((fp_usr = fopen(pwdfile, "r")) == NULL) {
1661 debuga(_("(getusers) Cannot open file %s - %s\n"),pwdfile,strerror(errno));
1662 exit(EXIT_FAILURE);
1663 }
1664
1665 fseek(fp_usr, 0, SEEK_END);
1666 nreg = ftell(fp_usr);
1667 if (nreg<0) {
1668 debuga(_("Cannot get the size of file %s\n"),pwdfile);
1669 exit(EXIT_FAILURE);
1670 }
1671 nreg = nreg+5000;
1672 fseek(fp_usr, 0, SEEK_SET);
1673
1674 if((userfile=(char *) malloc(nreg))==NULL){
1675 debuga(_("malloc error (%ld)\n"),nreg);
1676 exit(EXIT_FAILURE);
1677 }
1678
1679 bzero(userfile,nreg);
1680 strcpy(userfile,":");
1681
1682 while(fgets(buf,sizeof(buf),fp_usr)!=NULL) {
1683 str=strchr(buf,':');
1684 if (!str) {
1685 debuga(_("You have an invalid user in your %s file\n"),pwdfile);
1686 exit(EXIT_FAILURE);
1687 }
1688 str[1]='\0';
1689 strcat(userfile,buf);
1690 }
1691
1692 fclose(fp_usr);
1693
1694 return;
1695 }