]> git.ipfire.org Git - thirdparty/sarg.git/blob - log.c
Optimize time range filtering
[thirdparty/sarg.git] / log.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2010
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 #ifdef HAVE_GETOPT_H
31 #include <getopt.h>
32 #endif
33
34 #define REPORT_EVERY_X_LINES 5000
35 #define MAX_OPEN_USER_FILES 10
36
37 struct userfilestruct
38 {
39 struct userfilestruct *next;
40 struct userinfostruct *user;
41 FILE *file;
42 };
43
44 /*@null@*/static char *userfile=NULL;
45
46 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
47 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
48 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
49
50 static void getusers(const char *pwdfile, int debug);
51
52 int main(int argc,char *argv[])
53 {
54 enum isa_col_id {
55 ISACOL_Ip,
56 ISACOL_UserName,
57 ISACOL_Date,
58 ISACOL_Time,
59 ISACOL_TimeTaken,
60 ISACOL_Bytes,
61 ISACOL_Uri,
62 ISACOL_Status,
63 ISACOL_Last //last entry of the list !
64 };
65 enum InputLogFormat {
66 ILF_Unknown,
67 ILF_Squid,
68 ILF_Common,
69 ILF_Sarg,
70 ILF_Isa,
71 ILF_Last //last entry of the list !
72 };
73
74 FILE *fp_in = NULL, *fp_denied=NULL, *fp_authfail=NULL, *fp_log=NULL;
75
76 char sz_Download_Unsort[ 20000 ] ;
77 FILE * fp_Download_Unsort = NULL ;
78
79 extern int optind;
80 extern int optopt;
81 extern char *optarg;
82
83 char data[255];
84 char elap[255];
85 char ip[MAXLEN];
86 char tam[255];
87 char fun[MAXLEN];
88 char wuser[MAXLEN];
89 char smartfilter[MAXLEN];
90 char dia[128];
91 char mes[30];
92 char ano[30];
93 char hora[30];
94 char wtemp[MAXLEN];
95 char date[255];
96 char arq[255];
97 char arq_log[255];
98 int hm, hmf, hmr;
99 char hm_str[15];
100 char uagent[MAXLEN];
101 char hexclude[MAXLEN];
102 char csort[MAXLEN];
103 int cstatus;
104 char tbuf2[128];
105 char zip[20];
106 char *str;
107 char tmp3[MAXLEN];
108 char denied_unsort[MAXLEN];
109 char denied_sort[MAXLEN];
110 char authfail_unsort[MAXLEN];
111 char start_hour[128];
112 char end_hour[128];
113 char *linebuf;
114 char hostname[512];
115 char *url;
116 char *urly;
117 char user[MAX_USER_LEN];
118 enum InputLogFormat ilf;
119 int ilf_count[ILF_Last];
120 int ch;
121 int x;
122 int errflg=0;
123 int puser=0;
124 bool fhost=false;
125 bool dns=false;
126 bool fuser=false;
127 int idata=0;
128 int mindate=0;
129 int maxdate=0;
130 int iarq=0;
131 int isa_ncols=0,isa_cols[ISACOL_Last];
132 bool from_stdin;
133 bool from_pipe;
134 int blen;
135 int maxopenfiles;
136 int nopen;
137 bool id_is_ip;
138 long totregsl=0;
139 long totregsg=0;
140 long totregsx=0;
141 bool totper=false;
142 long int max_elapsed=0;
143 long long int iyear, imonth, iday;
144 bool realt;
145 bool userip;
146 struct tm tt;
147 struct tm *t;
148 unsigned long recs1=0UL;
149 unsigned long recs2=0UL;
150 int OutputNonZero = REPORT_EVERY_X_LINES ;
151 bool download_flag=false;
152 char *download_url=NULL;
153 struct getwordstruct gwarea;
154 longline line;
155 time_t tnum;
156 struct stat logstat;
157 struct userinfostruct *uinfo;
158 struct userfilestruct *first_user_file, *ufile, *ufile1, *prev_ufile;
159 static int split=0;
160 static int convert=0;
161 static int output_css=0;
162 int option_index;
163 static struct option long_options[]=
164 {
165 {"convert",no_argument,&convert,1},
166 {"css",no_argument,&output_css,1},
167 {"split",no_argument,&split,1},
168 {0,0,0,0}
169 };
170
171 #ifdef HAVE_LOCALE_H
172 setlocale(LC_TIME,"");
173 #endif
174
175 #if defined(ENABLE_NLS) && defined(HAVE_LOCALE_H)
176 if (!setlocale (LC_ALL, "")) {
177 fprintf(stderr,"SARG: Cannot set the locale LC_ALL to the environment variable\n");
178 exit(EXIT_FAILURE);
179 }
180 if (!bindtextdomain (PACKAGE_NAME, LOCALEDIR)) {
181 fprintf(stderr,"SARG: Cannot bind to text domain %s in directory %s (%s)\n",PACKAGE_NAME,LOCALEDIR,strerror(errno));
182 exit(EXIT_FAILURE);
183 }
184 if (!textdomain (PACKAGE_NAME)) {
185 fprintf(stderr,"SARG: Cannot set gettext domain for %s PACKAGE_NAME (%s)\n",PACKAGE_NAME,strerror(errno));
186 exit(EXIT_FAILURE);
187 }
188 #endif //ENABLE_NLS
189
190 BgImage[0]='\0';
191 LogoImage[0]='\0';
192 LogoText[0]='\0';
193 PasswdFile[0]='\0';
194 OutputEmail[0]='\0';
195 UserAgentLog[0]='\0';
196 ExcludeHosts[0]='\0';
197 ExcludeUsers[0]='\0';
198 ConfigFile[0]='\0';
199 code[0]='\0';
200 LastLog=0;
201 ReportType=0UL;
202 UserTabFile[0]='\0';
203 BlockIt[0]='\0';
204 ExternalCSSFile[0]='\0';
205 RedirectorLogFormat[0]='\0';
206 NRedirectorLogs=0;
207 for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
208
209 sprintf(ExcludeCodes,"%s/exclude_codes",SYSCONFDIR);
210 strcpy(GraphDaysBytesBarColor,"orange");
211 strcpy(BgColor,"#ffffff");
212 strcpy(TxColor,"#000000");
213 strcpy(TxBgColor,"lavender");
214 strcpy(TiColor,"darkblue");
215 strcpy(Width,"80");
216 strcpy(Height,"45");
217 strcpy(LogoTextColor,"#000000");
218 strcpy(HeaderColor,"darkblue");
219 strcpy(HeaderBgColor,"#dddddd");
220 strcpy(LogoTextColor,"#006699");
221 strcpy(FontSize,"9px");
222 strcpy(TempDir,"/tmp");
223 strcpy(OutputDir,"/var/www/html/squid-reports");
224 Ip2Name=false;
225 strcpy(DateFormat,"u");
226 OverwriteReport=false;
227 RemoveTempFiles=true;
228 strcpy(ReplaceIndex,"index.html");
229 Index=INDEX_YES;
230 RecordsWithoutUser=RECORDWITHOUTUSER_IP;
231 UseComma=0;
232 strcpy(MailUtility,"mailx");
233 TopSitesNum=100;
234 TopUsersNum=0;
235 UserIp=0;
236 TopuserSort=TOPUSER_SORT_BYTES | TOPUSER_SORT_REVERSE;
237 UserSort=USER_SORT_BYTES | USER_SORT_REVERSE;
238 TopsitesSort=TOPSITE_SORT_CONNECT | TOPSITE_SORT_REVERSE;
239 LongUrl=0;
240 strcpy(FontFace,"Verdana,Tahoma,Arial");
241 datetimeby=DATETIME_BYTE;
242 strcpy(CharSet,"ISO-8859-1");
243 Privacy=0;
244 strcpy(PrivacyString,"***.***.***.***");
245 strcpy(PrivacyStringColor,"blue");
246 SuccessfulMsg=true;
247 TopUserFields=TOPUSERFIELDS_NUM | TOPUSERFIELDS_DATE_TIME | TOPUSERFIELDS_USERID | TOPUSERFIELDS_CONNECT |
248 TOPUSERFIELDS_BYTES | TOPUSERFIELDS_SETYB | TOPUSERFIELDS_IN_CACHE_OUT |
249 TOPUSERFIELDS_USED_TIME | TOPUSERFIELDS_MILISEC | TOPUSERFIELDS_PTIME |
250 TOPUSERFIELDS_TOTAL | TOPUSERFIELDS_AVERAGE;
251 UserReportFields=USERREPORTFIELDS_CONNECT | USERREPORTFIELDS_BYTES | USERREPORTFIELDS_SETYB |
252 USERREPORTFIELDS_IN_CACHE_OUT | USERREPORTFIELDS_USED_TIME | USERREPORTFIELDS_MILISEC |
253 USERREPORTFIELDS_PTIME | USERREPORTFIELDS_TOTAL | USERREPORTFIELDS_AVERAGE;
254 strcpy(DataFileDelimiter,";");
255 DataFileFields=DATA_FIELD_USER | DATA_FIELD_DATE | DATA_FIELD_TIME | DATA_FIELD_URL | DATA_FIELD_CONNECT |
256 DATA_FIELD_BYTES | DATA_FIELD_IN_CACHE | DATA_FIELD_OUT_CACHE | DATA_FIELD_ELAPSED;
257 ShowReadStatistics=true;
258 strcpy(IndexSortOrder,"D");
259 ShowSargInfo=true;
260 ShowSargLogo=true;
261 strcpy(ParsedOutputLog,"no");
262 strcpy(ParsedOutputLogCompress,"/bin/gzip -f");
263 DisplayedValues=DISPLAY_ABBREV;
264 strcpy(HeaderFontSize,"9px");
265 strcpy(TitleFontSize,"11px");
266 strcpy(AuthUserTemplateFile,"sarg_htaccess");
267 set_download_suffix("7z,ace,arj,avi,bat,bin,bz2,bzip,cab,com,cpio,dll,doc,dot,exe,gz,iso,lha,lzh,mdb,mov,mp3,mpeg,mpg,mso,nrg,ogg,ppt,rar,rtf,shs,src,sys,tar,tgz,vcd,vob,wma,wmv,zip");
268 Graphs=true;
269 #if defined(FONTDIR)
270 strcpy(GraphFont,FONTDIR"/DejaVuSans.ttf");
271 #else
272 GraphFont[0]='\0';
273 #endif
274 strcpy(Ulimit,"20000");
275 NtlmUserFormat=NTLMUSERFORMAT_DOMAINUSER;
276 IndexTree=INDEX_TREE_FILE;
277 strcpy(RealtimeTypes,"GET,PUT,CONNECT");
278 RealtimeUnauthRec=REALTIME_UNAUTH_REC_SHOW;
279 RedirectorFilterOutDate=true;
280 DansguardianFilterOutDate=true;
281 DataFileUrl=DATAFILEURL_IP;
282 strcpy(MaxElapsed,"28800000");
283 BytesInSitesUsersReport=0;
284 UserAuthentication=0;
285 strcpy(LDAPHost,"127.0.0.1");
286 LDAPPort=389;
287 LDAPProtocolVersion=3;
288 LDAPBindDN[0]='\0';
289 LDAPBindPW[0]='\0';
290 LDAPBaseSearch[0]='\0';
291 strcpy(LDAPFilterSearch, "(uid=%s)");
292 strcpy(LDAPTargetAttr, "cn");
293
294 dia[0]='\0';
295 mes[0]='\0';
296 ano[0]='\0';
297 hora[0]='\0';
298 tmp[0]='\0';
299 tmp3[0]='\0';
300 wtemp[0]='\0';
301 us[0]='\0';
302 date[0]='\0';
303 df[0]='\0';
304 uagent[0]='\0';
305 hexclude[0]='\0';
306 addr[0]='\0';
307 hm=-1;
308 hmf=-1;
309 site[0]='\0';
310 outdir[0]='\0';
311 elap[0]='\0';
312 email[0]='\0';
313 zip[0]='\0';
314 UserInvalidChar[0]='\0';
315 DataFile[0]='\0';
316 SquidGuardConf[0]='\0';
317 DansGuardianConf[0]='\0';
318 start_hour[0]='\0';
319 end_hour[0]='\0';
320
321 denied_count=0;
322 download_count=0;
323 authfail_count=0;
324 dansguardian_count=0;
325 squidguard_count=0;
326 useragent_count=0;
327 DeniedReportLimit=10;
328 AuthfailReportLimit=10;
329 DansGuardianReportLimit=10;
330 SquidGuardReportLimit=10;
331 DownloadReportLimit=50;
332 UserReportLimit=0;
333 debug=0;
334 debugz=0;
335 debugm=0;
336 iprel=false;
337 userip=false;
338 realt=false;
339 realtime_refresh=3;
340 realtime_access_log_lines=1000;
341 cost=0.01;
342 nocost=50000000;
343 ndownload=0;
344 squid24=false;
345 dfrom=0;
346 duntil=0;
347
348 bzero(IncludeUsers, sizeof(IncludeUsers));
349 bzero(ExcludeString, sizeof(ExcludeString));
350 first_user_file=NULL;
351 memset(&period,0,sizeof(period));
352
353 NAccessLog=0;
354 for(x=0; x<MAXLOGS; x++)
355 AccessLog[x][0]='\0';
356 AccessLogFromCmdLine=0;
357 RedirectorLogFromCmdLine=0;
358
359 strcpy(Title,_("Squid User Access Report"));
360
361 while((ch = getopt_long_only(argc, argv, "a:b:c:d:e:f:g:u:l:L:o:s:t:w:hijmnprvxyz",long_options,&option_index)) != -1){
362 switch(ch)
363 {
364 case 0:
365 break;
366 case 'a':
367 strcpy(addr,optarg);
368 break;
369 case 'b':
370 strcpy(uagent,optarg);
371 break;
372 case 'c':
373 strcpy(hexclude,optarg);
374 break;
375 case 'd':
376 strncpy(date,optarg,sizeof(date)-1);
377 date[sizeof(date)-1]='\0';
378 date_from(date, &dfrom, &duntil);
379 break;
380 case 'e':
381 strcpy(email,optarg);
382 break;
383 case 'f':
384 strcpy(ConfigFile,optarg);
385 break;
386 case 'g':
387 strcpy(df,optarg);
388 break;
389 case 'h':
390 usage(argv[0]);
391 exit(EXIT_SUCCESS);
392 case 'i':
393 iprel=true;
394 break;
395 case 'l':
396 if (NAccessLog>=MAXLOGS) {
397 debuga(_("Too many log files passed on command line with option -l.\n"));
398 exit(EXIT_FAILURE);
399 }
400 if (strlen(optarg)>=MAX_LOG_FILELEN) {
401 debuga(_("Log file name too long passed on command line with option -l: %s\n"),optarg);
402 exit(EXIT_FAILURE);
403 }
404 strcpy(AccessLog[NAccessLog],optarg);
405 NAccessLog++;
406 AccessLogFromCmdLine++;
407 break;
408 case 'L':
409 if (NRedirectorLogs>MAX_REDIRECTOR_LOGS) {
410 debuga(_("Too many redirector logs passed on command line with option -L.\n"));
411 exit(EXIT_FAILURE);
412 }
413 if (strlen(optarg)>=MAX_REDIRECTOR_FILELEN) {
414 debuga(_("Redirector log file name too long passed on command line with opton -L: %s\n"),optarg);
415 exit(EXIT_FAILURE);
416 }
417 strcpy(RedirectorLogs[NRedirectorLogs],optarg);
418 NRedirectorLogs++;
419 RedirectorLogFromCmdLine++;
420 break;
421 case 'm':
422 debugm++;
423 break;
424 case 'n':
425 dns=true;
426 break;
427 case 'o':
428 strcpy(outdir,optarg);
429 break;
430 case 'p':
431 userip=true;
432 break;
433 case 'r':
434 realt=true;
435 break;
436 case 's':
437 strcpy(site,optarg);
438 break;
439 case 't':
440 {
441 int h1,m1,h2,m2;
442
443 if(strstr(optarg,"-") == 0) {
444 if(sscanf(optarg,"%d:%d",&h1,&m1)!=2) {
445 debuga(_("Time period passed on the command line with option -t must be MM:SS\n"));
446 exit(EXIT_FAILURE);
447 }
448 hm=h1*100+m1;
449 hmf=hm;
450 snprintf(hm_str,sizeof(hm_str),"%02d:%02d",h1,m1);
451 } else {
452 if(sscanf(optarg,"%d:%d-%d:%d",&h1,&m1,&h2,&m2)!=4) {
453 debuga(_("Time range passed on the command line with option -t must be MM:SS-MM:SS\n"));
454 exit(EXIT_FAILURE);
455 }
456 hm=h1*100+m1;
457 hmf=h2*100+m2;
458 snprintf(hm_str,sizeof(hm_str),"%02d:%02d-%02d:%02d",h1,m1,h2,m2);
459 }
460 break;
461 }
462 case 'u':
463 strcpy(us,optarg);
464 break;
465 case 'v':
466 version();
467 break;
468 case 'w':
469 strcpy(tmp,optarg);
470 break;
471 case 'x':
472 debug++;
473 break;
474 case 'y':
475 langcode++;
476 break;
477 case 'z':
478 debugz++;
479 break;
480 /*case ':':
481 debuga(_("Option -%c require an argument\n"),optopt);
482 errflg++;
483 break;*/
484 case '?':
485 usage(argv[0]);
486 exit(EXIT_FAILURE);
487 default:
488 abort();
489 }
490 }
491
492 if (errflg>0) {
493 usage(argv[0]);
494 exit(2);
495 }
496
497 if (optind<argc) {
498 for (iarq=optind ; iarq<argc ; iarq++) {
499 if (NAccessLog>=MAXLOGS) {
500 debuga(_("Too many log files passed on command line.\n"));
501 exit(EXIT_FAILURE);
502 }
503 if (strlen(argv[iarq])>=MAX_LOG_FILELEN) {
504 debuga(_("Log file name too long passed on command line: %s\n"),argv[iarq]);
505 exit(EXIT_FAILURE);
506 }
507 strcpy(AccessLog[NAccessLog],argv[iarq]);
508 NAccessLog++;
509 AccessLogFromCmdLine++;
510 }
511 }
512
513 if(debug) debuga(_("Init\n"));
514
515 if(ConfigFile[0] == '\0') snprintf(ConfigFile,sizeof(ConfigFile),"%s/sarg.conf",SYSCONFDIR);
516 if(access(ConfigFile, R_OK) != 0) {
517 debuga(_("Cannot open config file: %s - %s\n"),ConfigFile,strerror(errno));
518 exit(EXIT_FAILURE);
519 }
520
521 if(access(ConfigFile, R_OK) == 0)
522 getconf();
523
524 if(userip) UserIp=true;
525
526 if(dns) Ip2Name=true;
527
528 if(realt) {
529 realtime();
530 exit(EXIT_SUCCESS);
531 }
532
533 if(IndexTree == INDEX_TREE_FILE)
534 strcpy(ImageFile,"../images");
535 else
536 strcpy(ImageFile,"../../../images");
537
538 dataonly=0;
539 if(DataFile[0] != '\0')
540 dataonly++;
541
542 if(NAccessLog == 0) {
543 strcpy(AccessLog[0],"/var/log/squid/access.log");
544 NAccessLog++;
545 }
546
547 if(output_css) {
548 css_content(stdout);
549 exit(EXIT_SUCCESS);
550 }
551 if(split) {
552 for (iarq=0 ; iarq<NAccessLog ; iarq++)
553 splitlog(AccessLog[iarq], df, dfrom, duntil, convert);
554 exit(EXIT_SUCCESS);
555 }
556 if(convert) {
557 for (iarq=0 ; iarq<NAccessLog ; iarq++)
558 convlog(AccessLog[iarq], df, dfrom, duntil);
559 exit(EXIT_SUCCESS);
560 }
561
562 load_excludecodes(ExcludeCodes);
563
564 if(access(PasswdFile, R_OK) == 0) {
565 getusers(PasswdFile,debug);
566 puser++;
567 }
568
569 if(hexclude[0] == '\0')
570 strcpy(hexclude,ExcludeHosts);
571 if(hexclude[0] != '\0') {
572 gethexclude(hexclude,debug);
573 fhost=true;
574 }
575
576 if(ReportType == 0) {
577 ReportType=REPORT_TYPE_TOPUSERS | REPORT_TYPE_TOPSITES | REPORT_TYPE_USERS_SITES |
578 REPORT_TYPE_SITES_USERS | REPORT_TYPE_DATE_TIME | REPORT_TYPE_DENIED |
579 REPORT_TYPE_AUTH_FAILURES | REPORT_TYPE_SITE_USER_TIME_DATE | REPORT_TYPE_DOWNLOADS;
580 }
581
582 if(access(ExcludeUsers, R_OK) == 0) {
583 getuexclude(ExcludeUsers,debug);
584 fuser=true;
585 }
586
587 indexonly=0;
588 if(fuser) {
589 if(is_indexonly())
590 indexonly++;
591 }
592 if(strcmp(ExcludeUsers,"indexonly") == 0) indexonly++;
593 if(Index == INDEX_ONLY) indexonly++;
594
595 if(MaxElapsed[0] != '\0') max_elapsed=atol(MaxElapsed);
596
597 if(outdir[0] == '\0') strcpy(outdir,OutputDir);
598 strcat(outdir,"/");
599
600 if(uagent[0] == '\0') strcpy(uagent,UserAgentLog);
601
602 if(tmp[0] == '\0') strcpy(tmp,TempDir);
603 else strcpy(TempDir,tmp);
604 /*
605 For historical reasons, the temporary directory is the subdirectory "sarg" of the path
606 provided by the user.
607 */
608 strcat(tmp,"/sarg");
609
610 if (tmp[0]!='\0' && strncmp(outdir,tmp,strlen(tmp))==0) {
611 debuga(_("The output directory \"%s\" must be outside of the temporary directory \"%s\"\n"),outdir,tmp);
612 exit(EXIT_FAILURE);
613 }
614
615 if(df[0] == '\0') strcpy(df,DateFormat);
616 else strcpy(DateFormat,df);
617
618 if(df[0] == '\0') {
619 strcpy(df,"u");
620 strcpy(DateFormat,"u");
621 }
622 if (df[0]=='w')
623 IndexTree=INDEX_TREE_FILE;
624
625 if(email[0] == '\0' && OutputEmail[0] != '\0') strcpy(email,OutputEmail);
626
627 if(email[0] != '\0') {
628 my_mkdir(tmp);
629 strcpy(outdir,tmp);
630 strcat(outdir,"/");
631 }
632
633 if(access(tmp, R_OK) == 0) {
634 unlinkdir(tmp,1);
635 }
636 my_mkdir(tmp);
637 snprintf(denied_unsort,sizeof(denied_unsort),"%s/denied.log.unsort",tmp);
638 snprintf(denied_sort,sizeof(denied_sort),"%s/denied.log",tmp);
639 snprintf(authfail_unsort,sizeof(authfail_unsort),"%s/authfail.log.unsort",tmp);
640
641 if(debug) {
642 debuga(_("Parameters:\n"));
643 debuga(_(" Hostname or IP address (-a) = %s\n"),addr);
644 debuga(_(" Useragent log (-b) = %s\n"),uagent);
645 debuga(_(" Exclude file (-c) = %s\n"),hexclude);
646 debuga(_(" Date from-until (-d) = %s\n"),date);
647 debuga(_(" Email address to send reports (-e) = %s\n"),email);
648 debuga(_(" Config file (-f) = %s\n"),ConfigFile);
649 if(strcmp(df,"e") == 0)
650 debuga(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
651 if(strcmp(df,"u") == 0)
652 debuga(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
653 if(strcmp(df,"w") == 0)
654 debuga(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
655 debuga(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
656 for (iarq=0 ; iarq<NAccessLog ; iarq++)
657 debuga(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
658 for (iarq=0 ; iarq<NRedirectorLogs ; iarq++)
659 debuga(_(" Redirector log (-L) = %s\n"),RedirectorLogs[iarq]);
660 debuga(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
661 debuga(_(" Output dir (-o) = %s\n"),outdir);
662 debuga(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
663 debuga(_(" Accessed site (-s) = %s\n"),site);
664 debuga(_(" Time (-t) = %s\n"),hm_str);
665 debuga(_(" User (-u) = %s\n"),us);
666 debuga(_(" Temporary dir (-w) = %s\n"),tmp);
667 debuga(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
668 debuga(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
669 debuga("\n");
670 }
671
672 if(debugm) {
673 printf(_("Parameters:\n"));
674 printf(_(" Hostname or IP address (-a) = %s\n"),addr);
675 printf(_(" Useragent log (-b) = %s\n"),uagent);
676 printf(_(" Exclude file (-c) = %s\n"),hexclude);
677 printf(_(" Date from-until (-d) = %s\n"),date);
678 printf(_(" Email address to send reports (-e) = %s\n"),email);
679 printf(_(" Config file (-f) = %s\n"),ConfigFile);
680 if(strcmp(df,"e") == 0)
681 printf(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
682 if(strcmp(df,"u") == 0)
683 printf(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
684 if(strcmp(df,"w") == 0)
685 printf(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
686 printf(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
687 for (iarq=0 ; iarq<NAccessLog ; iarq++)
688 printf(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
689 for (iarq=0 ; iarq<NRedirectorLogs ; iarq++)
690 printf(_(" Redirector log (-L) = %s\n"),RedirectorLogs[iarq]);
691 printf(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
692 printf(_(" Output dir (-o) = %s\n"),outdir);
693 printf(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
694 printf(_(" Accessed site (-s) = %s\n"),site);
695 printf(_(" Time (-t) = %s\n"),hm_str);
696 printf(_(" User (-u) = %s\n"),us);
697 printf(_(" Temporary dir (-w) = %s\n"),tmp);
698 printf(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
699 printf(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
700 printf(_("sarg version: %s\n"),VERSION);
701 }
702
703 if(debug)
704 debuga(_("sarg version: %s\n"),VERSION);
705
706 #ifdef ENABLE_DOUBLE_CHECK_DATA
707 debuga(_("Sarg compiled to report warnings if the output is inconsistent\n"));
708 #endif
709
710 maxopenfiles=MAX_OPEN_USER_FILES;
711 #ifdef HAVE_RLIM_T
712 if (Ulimit[0] != '\0') {
713 struct rlimit rl;
714 long l1, l2;
715 int rc=0;
716
717 #if defined(RLIMIT_NOFILE)
718 getrlimit (RLIMIT_NOFILE, &rl);
719 #elif defined(RLIMIT_OFILE)
720 getrlimit (RLIMIT_OFILE, &rl);
721 #else
722 #warning "No rlimit resource for the number of open files"
723 #endif
724 l1 = rl.rlim_cur;
725 l2 = rl.rlim_max;
726
727 rl.rlim_cur = atol(Ulimit);
728 rl.rlim_max = atol(Ulimit);
729 #if defined(RLIMIT_NOFILE)
730 rc=setrlimit (RLIMIT_NOFILE, &rl);
731 #elif defined(RLIMIT_OFILE)
732 rc=setrlimit (RLIMIT_OFILE, &rl);
733 #else
734 #warning "No rlimit resource for the number of open files"
735 #endif
736 if(rc == -1) {
737 debuga(_("setrlimit error - %s\n"),strerror(errno));
738 }
739
740 if(debug)
741 debuga("Maximum file descriptor: cur=%ld max=%ld, changed to cur="RLIM_STRING" max="RLIM_STRING,l1,l2,rl.rlim_cur,rl.rlim_max);
742 }
743 #endif
744
745 init_usertab(UserTabFile);
746
747 if ((line=longline_create())==NULL) {
748 debuga(_("Not enough memory to read a log file\n"));
749 exit(EXIT_FAILURE);
750 }
751
752 snprintf(sz_Download_Unsort,sizeof(sz_Download_Unsort),"%s/download.unsort", tmp);
753
754 if(DataFile[0]=='\0') {
755 if((ReportType & REPORT_TYPE_DENIED) != 0) {
756 if((fp_denied=MY_FOPEN(denied_unsort,"w"))==NULL) {
757 debuga(_("(log) Cannot open file: %s - %s\n"),denied_unsort,strerror(errno));
758 exit(EXIT_FAILURE);
759 }
760 }
761
762 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
763 if((fp_authfail=MY_FOPEN(authfail_unsort,"w"))==NULL) {
764 debuga(_("(log) Cannot open file: %s - %s\n"),authfail_unsort,strerror(errno));
765 exit(EXIT_FAILURE);
766 }
767 }
768 }
769
770 for (iarq=0 ; iarq<NAccessLog ; iarq++) {
771 strcpy(arq,AccessLog[iarq]);
772
773 strcpy(arqtt,arq);
774
775 if(strcmp(arq,"-")==0) {
776 if(debug)
777 debuga(_("Reading access log file: from stdin\n"));
778 fp_in=stdin;
779 from_stdin=true;
780 } else {
781 if (date[0]!='\0') {
782 if (stat(arq,&logstat)!=0) {
783 debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
784 } else {
785 struct tm *logtime=localtime(&logstat.st_mtime);
786 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
787 debuga(_("Ignoring old log file %s\n"),arq);
788 continue;
789 }
790 }
791 }
792 fp_in=decomp(arq,&from_pipe);
793 if(fp_in==NULL) {
794 debuga(_("(log) Cannot open log file: %s - %s\n"),arq,strerror(errno));
795 exit(EXIT_FAILURE);
796 }
797 if(debug) debuga(_("Reading access log file: %s\n"),arq);
798 from_stdin=false;
799 }
800 ilf=ILF_Unknown;
801 download_flag=false;
802 // pre-read the file only if we have to show stats
803 if(ShowReadStatistics && !from_stdin && !from_pipe) {
804 size_t nread,i;
805 bool skipcr=false;
806 char tmp4[MAXLEN];
807
808 recs1=0UL;
809 recs2=0UL;
810
811 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
812 for (i=0 ; i<nread ; i++)
813 if (skipcr) {
814 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
815 skipcr=false;
816 }
817 } else {
818 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
819 skipcr=true;
820 recs1++;
821 }
822 }
823 }
824 rewind(fp_in);
825 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
826 putchar('\r');
827 fflush( stdout ) ;
828 }
829
830 longline_reset(line);
831
832 while ((linebuf=longline_read(fp_in,line))!=NULL) {
833 blen=strlen(linebuf);
834
835 if (ilf==ILF_Unknown) {
836 if(strncmp(linebuf,"#Software: Mic",14) == 0) {
837 fixendofline(linebuf);
838 if (debug)
839 debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
840 ilf=ILF_Isa;
841 ilf_count[ilf]++;
842 continue;
843 }
844
845 if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
846 if (getperiod_fromsarglog(arqtt,&period)<0) {
847 debuga(_("The name of the file is invalid: %s\n"),arq);
848 exit(EXIT_FAILURE);
849 }
850 ilf=ILF_Sarg;
851 ilf_count[ilf]++;
852 continue;
853 }
854 }
855
856 if(!fp_log && strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg) {
857 if(access(ParsedOutputLog,R_OK) != 0) {
858 my_mkdir(ParsedOutputLog);
859 }
860 sprintf(arq_log,"%s/sarg_temp.log",ParsedOutputLog);
861 if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
862 debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
863 exit(EXIT_FAILURE);
864 }
865 fputs("*** SARG Log ***\n",fp_log);
866 }
867
868 recs2++;
869 if( ShowReadStatistics && !from_stdin && !from_pipe && --OutputNonZero<=0) {
870 double perc = recs2 * 100. / recs1 ;
871 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
872 putchar('\r');
873 fflush (stdout);
874 OutputNonZero = REPORT_EVERY_X_LINES ;
875 }
876 if(blen < 58) continue;
877 if(strstr(linebuf,"HTTP/0.0") != 0) continue;
878 if(strstr(linebuf,"logfile turned over") != 0) continue;
879 if(linebuf[0] == ' ') continue;
880
881 // exclude_string
882 if(ExcludeString[0] != '\0') {
883 bool exstring=false;
884 getword_start(&gwarea,ExcludeString);
885 while(strchr(gwarea.current,':') != 0) {
886 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
887 debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
888 exit(EXIT_FAILURE);
889 }
890 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
891 exstring=true;
892 break;
893 }
894 }
895 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
896 exstring=true;
897 if(exstring) continue;
898 }
899
900 totregsl++;
901 if(debugm)
902 printf("BUF=%s\n",linebuf);
903
904 t=NULL;
905 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
906 getword_start(&gwarea,linebuf);
907 if (getword(data,sizeof(data),&gwarea,' ')<0) {
908 debuga(_("Maybe you have a broken time in your access.log file\n"));
909 exit(EXIT_FAILURE);
910 }
911 if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
912 strcpy(ip,data);
913 strcpy(elap,"0");
914 if(squid24) {
915 if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
916 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
917 exit(EXIT_FAILURE);
918 }
919 } else {
920 if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
921 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
922 exit(EXIT_FAILURE);
923 }
924 }
925 if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
926 getword(fun,sizeof(fun),&gwarea,' ')<0) {
927 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
928 exit(EXIT_FAILURE);
929 }
930 if (getword_ptr(linebuf,&url,&gwarea,' ')<0) {
931 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
932 exit(EXIT_FAILURE);
933 }
934 if (getword_skip(MAXLEN,&gwarea,' ')<0) {
935 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
936 exit(EXIT_FAILURE);
937 }
938 if (getword(code2,sizeof(code2),&gwarea,' ')<0) {
939 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
940 exit(EXIT_FAILURE);
941 }
942 if (getword(tam,sizeof(tam),&gwarea,' ')<0) {
943 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
944 exit(EXIT_FAILURE);
945 }
946 if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
947 if (getword(code,sizeof(code),&gwarea,' ')<0) {
948 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
949 exit(EXIT_FAILURE);
950 }
951 } else {
952 if (getword(code,sizeof(code),&gwarea,'\0')<0) {
953 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
954 exit(EXIT_FAILURE);
955 }
956 }
957
958 if ((str = strchr(code, ':')) != NULL)
959 *str = '/';
960
961 if(strcmp(tam,"\0") == 0)
962 strcpy(tam,"0");
963
964 ilf=ILF_Common;
965 ilf_count[ilf]++;
966
967 getword_start(&gwarea,data+1);
968 if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
969 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
970 exit(EXIT_FAILURE);
971 }
972 if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
973 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
974 exit(EXIT_FAILURE);
975 }
976 getword_start(&gwarea,data);
977 if (getword_atoll(&iday,&gwarea,'/')<0){
978 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
979 exit(EXIT_FAILURE);
980 }
981 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
982 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
983 exit(EXIT_FAILURE);
984 }
985 if (getword_atoll(&iyear,&gwarea,'/')<0){
986 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
987 exit(EXIT_FAILURE);
988 }
989
990 imonth=month2num(mes)+1;
991 idata=builddia(iday,imonth,iyear);
992 computedate(iyear,imonth,iday,&tt);
993 t=&tt;
994 }
995
996 if(ilf==ILF_Unknown || ilf==ILF_Squid) {
997 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
998 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
999 exit(EXIT_FAILURE);
1000 }
1001 while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
1002 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
1003 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
1004 exit(EXIT_FAILURE);
1005 }
1006 if(strlen(elap) < 1) continue;
1007 if (getword(ip,sizeof(ip),&gwarea,' ')<0){
1008 debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq);
1009 exit(EXIT_FAILURE);
1010 }
1011 if (getword(code,sizeof(code),&gwarea,' ')<0){
1012 debuga(_("Maybe you have a broken result code in your %s file\n"),arq);
1013 exit(EXIT_FAILURE);
1014 }
1015 if (getword(tam,sizeof(tam),&gwarea,' ')<0){
1016 debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq);
1017 exit(EXIT_FAILURE);
1018 }
1019 if (getword(fun,sizeof(fun),&gwarea,' ')<0){
1020 debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
1021 exit(EXIT_FAILURE);
1022 }
1023 if (getword_ptr(linebuf,&url,&gwarea,' ')<0){
1024 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
1025 exit(EXIT_FAILURE);
1026 }
1027 if (getword(user,sizeof(user),&gwarea,' ')<0){
1028 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
1029 exit(EXIT_FAILURE);
1030 }
1031 ilf=ILF_Squid;
1032 ilf_count[ilf]++;
1033
1034 tnum=atoi(data);
1035 t=localtime(&tnum);
1036 if (t == NULL) {
1037 debuga(_("Cannot convert the timestamp from the squid log file\n"));
1038 exit(EXIT_FAILURE);
1039 }
1040
1041 strftime(tbuf2, sizeof(tbuf2), "%H%M", t);
1042
1043 idata=(t->tm_year+1900)*10000+(t->tm_mon+1)*100+t->tm_mday;
1044 }
1045 }
1046 if (ilf==ILF_Sarg) {
1047 getword_start(&gwarea,linebuf);
1048 if (getword(data,sizeof(data),&gwarea,'\t')<0){
1049 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1050 exit(EXIT_FAILURE);
1051 }
1052 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
1053 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1054 exit(EXIT_FAILURE);
1055 }
1056 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
1057 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1058 exit(EXIT_FAILURE);
1059 }
1060 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
1061 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1062 exit(EXIT_FAILURE);
1063 }
1064 if (getword_ptr(linebuf,&url,&gwarea,'\t')<0){
1065 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1066 exit(EXIT_FAILURE);
1067 }
1068 if (getword(tam,sizeof(tam),&gwarea,'\t')<0){
1069 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1070 exit(EXIT_FAILURE);
1071 }
1072 if (getword(code,sizeof(code),&gwarea,'\t')<0){
1073 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1074 exit(EXIT_FAILURE);
1075 }
1076 if (getword(elap,sizeof(elap),&gwarea,'\t')<0){
1077 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1078 exit(EXIT_FAILURE);
1079 }
1080 if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
1081 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1082 exit(EXIT_FAILURE);
1083 }
1084 getword_start(&gwarea,data);
1085 if (getword_atoll(&iday,&gwarea,'/')<0 || iday<1 || iday>31){
1086 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1087 exit(EXIT_FAILURE);
1088 }
1089 if (getword_atoll(&imonth,&gwarea,'/')<0 || imonth<1 || imonth>12){
1090 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1091 exit(EXIT_FAILURE);
1092 }
1093 if (getword_atoll(&iyear,&gwarea,'\0')<0){
1094 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1095 exit(EXIT_FAILURE);
1096 }
1097 idata=builddia(iday,imonth,iyear);
1098 computedate(iyear,imonth,iday,&tt);
1099 t=&tt;
1100 }
1101 if (ilf==ILF_Isa) {
1102 if (linebuf[0] == '#') {
1103 int ncols,cols[ISACOL_Last];
1104
1105 fixendofline(linebuf);
1106 getword_start(&gwarea,linebuf);
1107 // remove the #Fields: column at the beginning of the line
1108 if (getword_skip(1000,&gwarea,' ')<0){
1109 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1110 exit(EXIT_FAILURE);
1111 }
1112 for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
1113 ncols=0;
1114 while(gwarea.current[0] != '\0') {
1115 if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
1116 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1117 exit(EXIT_FAILURE);
1118 }
1119 if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
1120 if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
1121 if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
1122 if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
1123 if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
1124 if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
1125 if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
1126 if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
1127 ncols++;
1128 }
1129 if (cols[ISACOL_Ip]>=0) {
1130 isa_ncols=ncols;
1131 for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
1132 isa_cols[ncols]=cols[ncols];
1133 }
1134 continue;
1135 }
1136 if (!isa_ncols) continue;
1137 getword_start(&gwarea,linebuf);
1138 for (x=0 ; x<isa_ncols ; x++) {
1139 if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
1140 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1141 exit(EXIT_FAILURE);
1142 }
1143 if (x==isa_cols[ISACOL_Ip]) {
1144 if (strlen(str)>=sizeof(ip)) {
1145 debuga(_("Maybe you have a broken IP in your %s file\n"),arq);
1146 exit(EXIT_FAILURE);
1147 }
1148 strcpy(ip,str);
1149 } else if (x==isa_cols[ISACOL_UserName]) {
1150 if (strlen(str)>=sizeof(user)) {
1151 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
1152 exit(EXIT_FAILURE);
1153 }
1154 strcpy(user,str);
1155 } else if (x==isa_cols[ISACOL_Date]) {
1156 if (strlen(str)>=sizeof(data)) {
1157 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1158 exit(EXIT_FAILURE);
1159 }
1160 strcpy(data,str);
1161 } else if (x==isa_cols[ISACOL_Time]) {
1162 if (strlen(str)>=sizeof(hora)) {
1163 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1164 exit(EXIT_FAILURE);
1165 }
1166 strcpy(hora,str);
1167 } else if (x==isa_cols[ISACOL_TimeTaken]) {
1168 if (strlen(str)>=sizeof(elap)) {
1169 debuga(_("Maybe you have a broken download duration in your %s file\n"),arq);
1170 exit(EXIT_FAILURE);
1171 }
1172 strcpy(elap,str);
1173 } else if (x==isa_cols[ISACOL_Bytes]) {
1174 if (strlen(str)>=sizeof(tam)) {
1175 debuga(_("Maybe you have a broken download size in your %s file\n"),arq);
1176 exit(EXIT_FAILURE);
1177 }
1178 strcpy(tam,str);
1179 } else if (x==isa_cols[ISACOL_Uri]) {
1180 url=str;
1181 } else if (x==isa_cols[ISACOL_Status]) {
1182 if (strlen(str)>=sizeof(code)) {
1183 debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
1184 exit(EXIT_FAILURE);
1185 }
1186 strcpy(code,str);
1187 }
1188 }
1189
1190 if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
1191 sprintf(val1,"DENIED/%s",code);
1192 strcpy(code,val1);
1193 }
1194 getword_start(&gwarea,data);
1195 if (getword_atoll(&iyear,&gwarea,'-')<0){
1196 debuga(_("Maybe you have a broken year in your %s file\n"),arq);
1197 exit(EXIT_FAILURE);
1198 }
1199 if (getword_atoll(&imonth,&gwarea,'-')<0){
1200 debuga(_("Maybe you have a broken month in your %s file\n"),arq);
1201 exit(EXIT_FAILURE);
1202 }
1203 if (getword_atoll(&iday,&gwarea,'\0')<0){
1204 debuga(_("Maybe you have a broken day in your %s file\n"),arq);
1205 exit(EXIT_FAILURE);
1206 }
1207
1208 idata=builddia(iday,imonth,iyear);
1209 computedate(iyear,imonth,iday,&tt);
1210 t=&tt;
1211 }
1212 if (t==NULL) {
1213 debuga(_("Unknown input log file format\n"));
1214 break;
1215 }
1216
1217 strftime(dia, sizeof(dia), "%d/%m/%Y", t);
1218 snprintf(hora,sizeof(hora),"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec);
1219
1220 if(debugm)
1221 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",date,idata,dfrom,duntil);
1222
1223 if(date[0] != '\0'){
1224 if(idata < dfrom || idata > duntil) continue;
1225 }
1226
1227 // Record only hours usage which is required
1228 if (t) {
1229 if( bsearch( &( t -> tm_wday ), weekdays.list, weekdays.len,
1230 sizeof( int ), compar ) == NULL )
1231 continue;
1232
1233 if( bsearch( &( t -> tm_hour ), hours.list, hours.len,
1234 sizeof( int ), compar ) == NULL )
1235 continue;
1236 }
1237
1238
1239 if(strlen(user) > MAX_USER_LEN) {
1240 if (debugm) printf(_("User ID too long: %s\n"),user);
1241 totregsx++;
1242 continue;
1243 }
1244
1245 // include_users
1246 if(IncludeUsers[0] != '\0') {
1247 sprintf(val1,":%s:",user);
1248 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
1249 continue;
1250 }
1251
1252 if(vercode(code)) {
1253 if (debugm) printf(_("Excluded code: %s\n"),code);
1254 totregsx++;
1255 continue;
1256 }
1257
1258 if(testvaliduserchar(user))
1259 continue;
1260
1261 #if 0
1262 if((str = strstr(user,"%20")) != NULL) {
1263 /*
1264 This is a patch introduced to solve bug #1624251 reported at sourceforge but
1265 the side effect is to truncate the name at the first space and merge the reports
1266 of people whose name is identical up to the first space.
1267
1268 The old code used to truncate the user name at the first % if a %20 was
1269 found anywhere in the string. That means the string could be truncated
1270 at the wrong place if another % occured before the %20. This new code should
1271 avoid that problem and only truncate at the space. There is no bug
1272 report indicating that anybody noticed this.
1273 */
1274 *str='\0';
1275 }
1276
1277 /*
1278 Code prior to 2.2.7 used to replace any %xx by a dot as long as a %5c was
1279 found in the user name.
1280 */
1281 while((str = strstr(user,"%5c")) != NULL) {
1282 *str='.';
1283 for (x=3 ; str[x] ; x++) str[x-2]=str[x];
1284 }
1285 #endif
1286
1287 urly=url;
1288
1289 if(ilf!=ILF_Sarg) {
1290 /*
1291 The full URL is not saved in sarg log. There is no point in testing the URL to detect
1292 a downloaded file.
1293 */
1294 download_flag=is_download_suffix(url);
1295 if (download_flag) {
1296 download_url=url;
1297 download_count++;
1298 }
1299 } else
1300 download_flag=false;
1301
1302 // remove any protocol:// at the beginning of the URL
1303 if ((str = strchr(url,'/')) != NULL && str[1] == '/') {
1304 int i;
1305
1306 str+=2;
1307 for (i=0 ; str[i] ; i++)
1308 url[i]=str[i];
1309 url[i]='\0';
1310 }
1311
1312 if(!LongUrl) {
1313 url_hostname(url,hostname,sizeof(hostname));
1314 url=hostname;
1315 }
1316
1317 if(url[0] == '\0') continue;
1318
1319 if(addr[0] != '\0'){
1320 if(strcmp(addr,ip)!=0) continue;
1321 }
1322 if(fhost) {
1323 if(!vhexclude(url)) {
1324 if (debugm) printf(_("Excluded site: %s\n"),url);
1325 totregsx++;
1326 continue;
1327 }
1328 }
1329
1330 if(hm >= 0 && hmf >= 0) {
1331 hmr=t->tm_hour*100+t->tm_min;
1332 if(hmr < hm || hmr > hmf) continue;
1333 }
1334
1335 if(site[0] != '\0'){
1336 if(strstr(url,site)==0) continue;
1337 }
1338
1339 if(UserIp) {
1340 strcpy(user,ip);
1341 id_is_ip=true;
1342 } else {
1343 id_is_ip=false;
1344 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
1345 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
1346 strcpy(user,ip);
1347 id_is_ip=true;
1348 }
1349 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
1350 continue;
1351 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
1352 strcpy(user,"everybody");
1353 } else {
1354 strlow(user);
1355 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
1356 if((str = strchr(user,'_')) != 0) {
1357 strcpy(warea,str+1);
1358 strcpy(user,warea);
1359 }
1360 if((str = strchr(user,'+')) != 0) {
1361 strcpy(warea,str+1);
1362 strcpy(user,warea);
1363 }
1364 }
1365 }
1366 }
1367
1368 if(us[0] != '\0'){
1369 if(strcmp(user,us)!=0) continue;
1370 }
1371
1372 if(puser) {
1373 sprintf(wuser,":%s:",user);
1374 if(strstr(userfile, wuser) == 0)
1375 continue;
1376 }
1377
1378 if(fuser) {
1379 if(!vuexclude(user)) {
1380 if (debugm) printf(_("Excluded user: %s\n"),user);
1381 totregsx++;
1382 continue;
1383 }
1384 }
1385
1386 if(strcmp(user,"-") ==0 || strcmp(user," ") ==0 || strcmp(user,"") ==0 || strcmp(user,":") ==0)
1387 continue;
1388
1389 if(max_elapsed) {
1390 if(atol(elap)>max_elapsed) {
1391 elap[0]='0';
1392 elap[1]='\0';
1393 }
1394 }
1395
1396 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
1397 fixendofline(str);
1398 sprintf(smartfilter,"\"%s\"",str+1);
1399 } else sprintf(smartfilter,"\"\"");
1400
1401 nopen=0;
1402 prev_ufile=NULL;
1403 for (ufile=first_user_file ; ufile && strcmp(user,ufile->user->id)!=0 ; ufile=ufile->next) {
1404 prev_ufile=ufile;
1405 if (ufile->file) nopen++;
1406 }
1407 if (!ufile) {
1408 ufile=malloc(sizeof(*ufile));
1409 if (!ufile) {
1410 debuga(_("Not enough memory to store the user %s\n"),user);
1411 exit(EXIT_FAILURE);
1412 }
1413 memset(ufile,0,sizeof(*ufile));
1414 ufile->next=first_user_file;
1415 first_user_file=ufile;
1416 uinfo=userinfo_create(user);
1417 ufile->user=uinfo;
1418 uinfo->id_is_ip=id_is_ip;
1419 } else {
1420 if (prev_ufile) {
1421 prev_ufile->next=ufile->next;
1422 ufile->next=first_user_file;
1423 first_user_file=ufile;
1424 }
1425 }
1426
1427 if (ufile->file==NULL) {
1428 if (nopen>=maxopenfiles) {
1429 x=0;
1430 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
1431 if (ufile1->file!=NULL) {
1432 if (x>=maxopenfiles) {
1433 if (fclose(ufile1->file)==EOF) {
1434 debuga(_("Failed to close the log file of user %s - %s\n"),ufile1->user->id,strerror(errno));
1435 exit(EXIT_FAILURE);
1436 }
1437 ufile1->file=NULL;
1438 }
1439 x++;
1440 }
1441 }
1442 }
1443 if (snprintf (tmp3, sizeof(tmp3), "%s/%s.unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
1444 debuga(_("Temporary user file name too long: %s/%s.unsort\n"), tmp, ufile->user->filename);
1445 exit(EXIT_FAILURE);
1446 }
1447 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
1448 debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno));
1449 exit (1);
1450 }
1451 }
1452
1453 /*if ( strcmp ( user , sz_Last_User ) != 0 ) {
1454 if ( fp_Write_User )
1455 fclose( fp_Write_User ) ;
1456 sprintf (tmp3, "%s/%s.unsort", tmp, user);
1457
1458 if ((fp_Write_User = MY_FOPEN (tmp3, "a")) == NULL) {
1459 fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], _("Cannot open temporary file"), tmp3, strerror(errno));
1460 exit (1);
1461 }
1462 strcpy( sz_Last_User , user ) ;
1463 }*/
1464 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,ip,url,tam,code,elap,smartfilter)<=0) {
1465 debuga(_("Write error in the log file of user %s\n"),user);
1466 exit(EXIT_FAILURE);
1467 }
1468
1469 if(fp_log && ilf!=ILF_Sarg)
1470 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,url,tam,code,elap,smartfilter);
1471
1472 totregsg++;
1473
1474 if(!dataonly && download_flag && download_url && strstr(code,"DENIED") == 0) {
1475 ndownload = 1;
1476
1477 if ( ! fp_Download_Unsort ) {
1478 if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
1479 debuga(_("(log) Cannot open temporary file: %s - %s\n"),sz_Download_Unsort, strerror(errno));
1480 exit (1);
1481 }
1482 }
1483 fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,download_url);
1484 }
1485
1486 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
1487 if(fp_denied && strstr(code,"DENIED/403") != 0) {
1488 fprintf(fp_denied, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1489 denied_count++;
1490 }
1491 if(fp_authfail && (strstr(code,"DENIED/401") != 0 || strstr(code,"DENIED/407") != 0)) {
1492 fprintf(fp_authfail, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1493 authfail_count++;
1494 }
1495 }
1496
1497 if (ilf!=ILF_Sarg) {
1498 if(!totper || idata<mindate){
1499 mindate=idata;
1500 memcpy(&period.start,t,sizeof(*t));
1501 strcpy(start_hour,tbuf2);
1502 }
1503 if (!totper || idata>maxdate) {
1504 maxdate=idata;
1505 memcpy(&period.end,t,sizeof(*t));
1506 }
1507 totper=true;
1508 }
1509
1510 if(debugm){
1511 printf("IP=\t%s\n",ip);
1512 printf("USER=\t%s\n",user);
1513 printf("ELAP=\t%s\n",elap);
1514 printf("DATE=\t%s\n",dia);
1515 printf("TIME=\t%s\n",hora);
1516 printf("FUNC=\t%s\n",fun);
1517 printf("URL=\t%s\n",url);
1518 printf("CODE=\t%s\n",code);
1519 printf("LEN=\t%s\n",tam);
1520 }
1521 }
1522 if (!from_stdin) {
1523 if (from_pipe)
1524 pclose(fp_in);
1525 else {
1526 fclose(fp_in);
1527 if( ShowReadStatistics )
1528 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs1, (float) 100 );
1529 }
1530 }
1531 }
1532
1533 if (debug)
1534 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
1535
1536 longline_destroy(&line);
1537 if ( fp_Download_Unsort )
1538 fclose (fp_Download_Unsort);
1539
1540 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
1541 ufile1=ufile->next;
1542 if (ufile->file!=NULL) fclose(ufile->file);
1543 free(ufile);
1544 }
1545
1546 free_download();
1547 free_excludecodes();
1548 free_exclude();
1549
1550 if(debug) {
1551 int totalcount=0;
1552
1553 for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
1554
1555 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
1556 debuga(_("Log with mixed records format (squid and common log)\n"));
1557
1558 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
1559 debuga(_("Common log format\n"));
1560
1561 if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
1562 debuga(_("Squid log format\n"));
1563
1564 if(ilf_count[ILF_Sarg]>0)
1565 debuga(_("Sarg log format\n"));
1566
1567 if(totalcount==0 && totregsg)
1568 debuga(_("Log with invalid format\n"));
1569 }
1570
1571 if(!totregsg){
1572 debuga(_("No records found\n"));
1573 debuga(_("End\n"));
1574 if(fp_denied) fclose(fp_denied);
1575 if(fp_authfail) fclose(fp_authfail);
1576 userinfo_free();
1577 if(userfile) free(userfile);
1578 close_usertab();
1579 exit(EXIT_SUCCESS);
1580 }
1581
1582 if (date[0]!='\0') {
1583 char date0[30], date1[30];
1584
1585 strftime(date0,sizeof(date0),"%d/%m/%Y",&period.start);
1586 strftime(date1,sizeof(date1),"%d/%m/%Y",&period.end);
1587 debuga(_("Period covered by log files: %s-%s\n"),date0,date1);
1588 getperiod_fromrange(&period,dfrom,duntil);
1589 }
1590 if (getperiod_buildtext(&period)<0) {
1591 debuga(_("Failed to build the string representation of the date range\n"));
1592 exit(EXIT_FAILURE);
1593 }
1594
1595 if(debugz){
1596 debugaz("data",dia);
1597 debugaz("period",period.text);
1598 }
1599
1600 if(debug)
1601 debuga(_("Period: %s\n"),period.text);
1602
1603 // fclose(fp_ou);
1604 if(fp_denied)
1605 fclose(fp_denied);
1606 if(fp_authfail)
1607 fclose(fp_authfail);
1608
1609 if(fp_log != NULL) {
1610 fclose(fp_log);
1611 strcpy(end_hour,tbuf2);
1612 strftime(val2,sizeof(val2),"%d%m%Y",&period.start);
1613 strftime(val1,sizeof(val1),"%d%m%Y",&period.end);
1614 sprintf(val4,"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour);
1615 if (rename(arq_log,val4)) {
1616 debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno));
1617 } else {
1618 strcpy(arq_log,val4);
1619
1620 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
1621 /*
1622 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
1623 necessary around the command name, put them in the configuration file.
1624 */
1625 sprintf(val1,"%s \"%s\"",ParsedOutputLogCompress,arq_log);
1626 cstatus=system(val1);
1627 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1628 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
1629 debuga(_("command: %s\n"),val1);
1630 exit(EXIT_FAILURE);
1631 }
1632 }
1633 }
1634 if(debug)
1635 debuga(_("Sarg parsed log saved as %s\n"),arq_log);
1636 }
1637
1638 if(DataFile[0] == '\0' && (ReportType & REPORT_TYPE_DENIED) != 0) {
1639 sprintf(csort,"sort -T \"%s\" -k 3,3 -k 5,5 -o \"%s\" \"%s\"",tmp,denied_sort,denied_unsort);
1640 cstatus=system(csort);
1641 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1642 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
1643 debuga(_("sort command: %s\n"),csort);
1644 exit(EXIT_FAILURE);
1645 }
1646 unlink(denied_unsort);
1647 }
1648
1649 sort_users_log(tmp, debug);
1650
1651 if(DataFile[0] != '\0')
1652 data_file(tmp);
1653 else
1654 gerarel();
1655
1656 if((ReportType & REPORT_TYPE_DENIED) != 0)
1657 unlink(denied_sort);
1658
1659 if(strcmp(tmp,"/tmp") != 0) {
1660 unlinkdir(tmp,0);
1661 }
1662
1663 userinfo_free();
1664 if(userfile)
1665 free(userfile);
1666 close_usertab();
1667
1668 if(debug)
1669 debuga(_("End\n"));
1670
1671 exit(EXIT_SUCCESS);
1672
1673 }
1674
1675
1676 static void getusers(const char *pwdfile, int debug)
1677 {
1678
1679 FILE *fp_usr;
1680 char buf[255];
1681 char *str;
1682 long int nreg=0;
1683
1684 if(debug)
1685 debuga(_("Loading password file from %s\n"),pwdfile);
1686
1687 if ((fp_usr = fopen(pwdfile, "r")) == NULL) {
1688 debuga(_("(getusers) Cannot open file %s - %s\n"),pwdfile,strerror(errno));
1689 exit(EXIT_FAILURE);
1690 }
1691
1692 if (fseek(fp_usr, 0, SEEK_END)==-1) {
1693 debuga(_("Failed to move till the end of the users file %s: %s\n"),pwdfile,strerror(errno));
1694 exit(EXIT_FAILURE);
1695 }
1696 nreg = ftell(fp_usr);
1697 if (nreg<0) {
1698 debuga(_("Cannot get the size of file %s\n"),pwdfile);
1699 exit(EXIT_FAILURE);
1700 }
1701 nreg = nreg+5000;
1702 if (fseek(fp_usr, 0, SEEK_SET)==-1) {
1703 debuga(_("Failed to rewind the users file %s: %s\n"),pwdfile,strerror(errno));
1704 exit(EXIT_FAILURE);
1705 }
1706
1707 if((userfile=(char *) malloc(nreg))==NULL){
1708 debuga(_("malloc error (%ld)\n"),nreg);
1709 exit(EXIT_FAILURE);
1710 }
1711
1712 bzero(userfile,nreg);
1713 strcpy(userfile,":");
1714
1715 while(fgets(buf,sizeof(buf),fp_usr)!=NULL) {
1716 str=strchr(buf,':');
1717 if (!str) {
1718 debuga(_("You have an invalid user in your %s file\n"),pwdfile);
1719 exit(EXIT_FAILURE);
1720 }
1721 str[1]='\0';
1722 strcat(userfile,buf);
1723 }
1724
1725 fclose(fp_usr);
1726
1727 return;
1728 }