]> git.ipfire.org Git - thirdparty/sarg.git/blob - log.c
Fix the validation of the sarg log file name.
[thirdparty/sarg.git] / log.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2010
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 #define REPORT_EVERY_X_LINES 5000
31 #define MAX_OPEN_USER_FILES 10
32
33 struct userfilestruct
34 {
35 struct userfilestruct *next;
36 struct userinfostruct *user;
37 FILE *file;
38 };
39
40 /*@null@*/static char *userfile=NULL;
41
42 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
43 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
44 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
45
46 static void getusers(const char *pwdfile, int debug);
47
48 int main(int argc,char *argv[])
49 {
50 enum isa_col_id {
51 ISACOL_Ip,
52 ISACOL_UserName,
53 ISACOL_Date,
54 ISACOL_Time,
55 ISACOL_TimeTaken,
56 ISACOL_Bytes,
57 ISACOL_Uri,
58 ISACOL_Status,
59 ISACOL_Last //last entry of the list !
60 };
61 enum InputLogFormat {
62 ILF_Unknown,
63 ILF_Squid,
64 ILF_Common,
65 ILF_Sarg,
66 ILF_Isa,
67 ILF_Last //last entry of the list !
68 };
69
70 FILE *fp_in = NULL, *fp_denied=NULL, *fp_authfail=NULL, *fp_log=NULL;
71
72 char sz_Download_Unsort[ 20000 ] ;
73 FILE * fp_Download_Unsort = NULL ;
74
75 extern int optind;
76 extern int optopt;
77 extern char *optarg;
78
79 char data[255];
80 char elap[255];
81 char ip[MAXLEN];
82 char tam[255];
83 char fun[MAXLEN];
84 char wuser[MAXLEN];
85 char smartfilter[MAXLEN];
86 char dia[128];
87 char mes[30];
88 char ano[30];
89 char hora[30];
90 char wtemp[MAXLEN];
91 char wtemp2[255];
92 char date[255];
93 char arq[255];
94 char arq_log[255];
95 char hm[15], hmf[15], hmr[15];
96 int chm=0;
97 char uagent[MAXLEN];
98 char hexclude[MAXLEN];
99 char csort[MAXLEN];
100 int cstatus;
101 char tbuf2[128];
102 char zip[20];
103 char *str;
104 char tmp2[MAXLEN];
105 char tmp3[MAXLEN];
106 char denied_unsort[MAXLEN];
107 char denied_sort[MAXLEN];
108 char authfail_unsort[MAXLEN];
109 char start_hour[128];
110 char end_hour[128];
111 char *linebuf;
112 char hostname[512];
113 char *url;
114 char *urly;
115 char user[MAX_USER_LEN];
116 enum InputLogFormat ilf;
117 int ilf_count[ILF_Last];
118 int ch;
119 int x;
120 int errflg=0;
121 int puser=0;
122 bool fhost=false;
123 bool dns=false;
124 bool fuser=false;
125 int idata=0;
126 int mindate=0;
127 int maxdate=0;
128 int iarq=0;
129 int isa_ncols=0,isa_cols[ISACOL_Last];
130 bool from_stdin;
131 bool from_pipe;
132 int blen;
133 int maxopenfiles;
134 int nopen;
135 bool id_is_ip;
136 long totregsl=0;
137 long totregsg=0;
138 long totregsx=0;
139 bool totper=false;
140 long int max_elapsed=0;
141 long long int iyear, imonth, iday;
142 bool realt;
143 bool userip;
144 struct tm tt;
145 struct tm *t;
146 unsigned long recs1=0UL;
147 unsigned long recs2=0UL;
148 int OutputNonZero = REPORT_EVERY_X_LINES ;
149 bool download_flag=false;
150 char *download_url=NULL;
151 struct getwordstruct gwarea;
152 longline line;
153 time_t tnum;
154 struct stat logstat;
155 struct userinfostruct *uinfo;
156 struct userfilestruct *first_user_file, *ufile, *ufile1, *prev_ufile;
157
158 #ifdef HAVE_LOCALE_H
159 setlocale(LC_TIME,"");
160 #endif
161
162 #if defined(ENABLE_NLS) && defined(HAVE_LOCALE_H)
163 if (!setlocale (LC_ALL, "")) {
164 fprintf(stderr,"SARG: Cannot set the locale LC_ALL to the environment variable\n");
165 exit(EXIT_FAILURE);
166 }
167 if (!bindtextdomain (PACKAGE_NAME, LOCALEDIR)) {
168 fprintf(stderr,"SARG: Cannot bind to text domain %s in directory %s (%s)\n",PACKAGE_NAME,LOCALEDIR,strerror(errno));
169 exit(EXIT_FAILURE);
170 }
171 if (!textdomain (PACKAGE_NAME)) {
172 fprintf(stderr,"SARG: Cannot set gettext domain for %s PACKAGE_NAME (%s)\n",PACKAGE_NAME,strerror(errno));
173 exit(EXIT_FAILURE);
174 }
175 #endif //ENABLE_NLS
176
177 BgImage[0]='\0';
178 LogoImage[0]='\0';
179 LogoText[0]='\0';
180 PasswdFile[0]='\0';
181 OutputEmail[0]='\0';
182 UserAgentLog[0]='\0';
183 ExcludeHosts[0]='\0';
184 ExcludeUsers[0]='\0';
185 ConfigFile[0]='\0';
186 code[0]='\0';
187 LastLog=0;
188 ReportType=0UL;
189 UserTabFile[0]='\0';
190 BlockIt[0]='\0';
191 ExternalCSSFile[0]='\0';
192 RedirectorLogFormat[0]='\0';
193 NRedirectorLogs=0;
194 for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
195
196 sprintf(ExcludeCodes,"%s/exclude_codes",SYSCONFDIR);
197 strcpy(GraphDaysBytesBarColor,"orange");
198 strcpy(BgColor,"#ffffff");
199 strcpy(TxColor,"#000000");
200 strcpy(TxBgColor,"lavender");
201 strcpy(TiColor,"darkblue");
202 strcpy(Width,"80");
203 strcpy(Height,"45");
204 strcpy(LogoTextColor,"#000000");
205 strcpy(HeaderColor,"darkblue");
206 strcpy(HeaderBgColor,"#dddddd");
207 strcpy(LogoTextColor,"#006699");
208 strcpy(FontSize,"9px");
209 strcpy(TempDir,"/tmp");
210 strcpy(OutputDir,"/var/www/html/squid-reports");
211 Ip2Name=false;
212 strcpy(DateFormat,"u");
213 OverwriteReport=false;
214 RemoveTempFiles=true;
215 strcpy(ReplaceIndex,"index.html");
216 Index=INDEX_YES;
217 RecordsWithoutUser=RECORDWITHOUTUSER_IP;
218 UseComma=0;
219 strcpy(MailUtility,"mailx");
220 TopSitesNum=100;
221 TopUsersNum=0;
222 UserIp=0;
223 strcpy(TopuserSortField,"BYTES");
224 strcpy(UserSortField,"BYTES");
225 strcpy(TopuserSortOrder,"reverse");
226 strcpy(UserSortOrder,"reverse");
227 strcpy(TopsitesSortField,"CONNECT");
228 strcpy(TopsitesSortType,"D");
229 LongUrl=0;
230 strcpy(FontFace,"Verdana,Tahoma,Arial");
231 datetimeby=DATETIME_ELAP;
232 strcpy(CharSet,"ISO-8859-1");
233 Privacy=0;
234 strcpy(PrivacyString,"***.***.***.***");
235 strcpy(PrivacyStringColor,"blue");
236 SuccessfulMsg=true;
237 TopUserFields=TOPUSERFIELDS_NUM | TOPUSERFIELDS_DATE_TIME | TOPUSERFIELDS_USERID | TOPUSERFIELDS_CONNECT |
238 TOPUSERFIELDS_BYTES | TOPUSERFIELDS_SETYB | TOPUSERFIELDS_IN_CACHE_OUT |
239 TOPUSERFIELDS_USED_TIME | TOPUSERFIELDS_MILISEC | TOPUSERFIELDS_PTIME |
240 TOPUSERFIELDS_TOTAL | TOPUSERFIELDS_AVERAGE;
241 UserReportFields=USERREPORTFIELDS_CONNECT | USERREPORTFIELDS_BYTES | USERREPORTFIELDS_SETYB |
242 USERREPORTFIELDS_IN_CACHE_OUT | USERREPORTFIELDS_USED_TIME | USERREPORTFIELDS_MILISEC |
243 USERREPORTFIELDS_PTIME | USERREPORTFIELDS_TOTAL | USERREPORTFIELDS_AVERAGE;
244 strcpy(DataFileDelimiter,";");
245 DataFileFields=DATA_FIELD_USER | DATA_FIELD_DATE | DATA_FIELD_TIME | DATA_FIELD_URL | DATA_FIELD_CONNECT |
246 DATA_FIELD_BYTES | DATA_FIELD_IN_CACHE | DATA_FIELD_OUT_CACHE | DATA_FIELD_ELAPSED;
247 ShowReadStatistics=true;
248 strcpy(IndexSortOrder,"D");
249 ShowSargInfo=true;
250 ShowSargLogo=true;
251 strcpy(ParsedOutputLog,"no");
252 strcpy(ParsedOutputLogCompress,"/bin/gzip -f");
253 DisplayedValues=DISPLAY_ABBREV;
254 strcpy(HeaderFontSize,"9px");
255 strcpy(TitleFontSize,"11px");
256 strcpy(AuthUserTemplateFile,"sarg_htaccess");
257 set_download_suffix("7z,ace,arj,avi,bat,bin,bz2,bzip,cab,com,cpio,dll,doc,dot,exe,gz,iso,lha,lzh,mdb,mov,mp3,mpeg,mpg,mso,nrg,ogg,ppt,rar,rtf,shs,src,sys,tar,tgz,vcd,vob,wma,wmv,zip");
258 Graphs=true;
259 #if defined(FONTDIR)
260 strcpy(GraphFont,FONTDIR"/DejaVuSans.ttf");
261 #else
262 GraphFont[0]='\0';
263 #endif
264 strcpy(Ulimit,"20000");
265 NtlmUserFormat=NTLMUSERFORMAT_DOMAINUSER;
266 IndexTree=INDEX_TREE_FILE;
267 strcpy(RealtimeTypes,"GET,PUT,CONNECT");
268 strcpy(RealtimeUnauthRec,"show");
269 SquidguardIgnoreDate=0;
270 DansguardianIgnoreDate=0;
271 DataFileUrl=DATAFILEURL_IP;
272 strcpy(MaxElapsed,"28800000");
273 BytesInSitesUsersReport=0;
274 UserAuthentication=0;
275 strcpy(LDAPHost,"127.0.0.1");
276 LDAPPort=389;
277 LDAPProtocolVersion=3;
278 LDAPBindDN[0]='\0';
279 LDAPBindPW[0]='\0';
280 LDAPBaseSearch[0]='\0';
281 strcpy(LDAPFilterSearch, "uid=%s");
282 strcpy(LDAPTargetAttr, "cn");
283
284 dia[0]='\0';
285 mes[0]='\0';
286 ano[0]='\0';
287 hora[0]='\0';
288 tmp[0]='\0';
289 tmp2[0]='\0';
290 tmp3[0]='\0';
291 wtemp[0]='\0';
292 wtemp2[0]='\0';
293 us[0]='\0';
294 date[0]='\0';
295 df[0]='\0';
296 uagent[0]='\0';
297 hexclude[0]='\0';
298 addr[0]='\0';
299 hm[0]='\0';
300 hmf[0]='\0';
301 site[0]='\0';
302 outdir[0]='\0';
303 elap[0]='\0';
304 email[0]='\0';
305 zip[0]='\0';
306 UserInvalidChar[0]='\0';
307 DataFile[0]='\0';
308 SquidGuardConf[0]='\0';
309 DansGuardianConf[0]='\0';
310 start_hour[0]='\0';
311 end_hour[0]='\0';
312
313 denied_count=0;
314 download_count=0;
315 authfail_count=0;
316 dansguardian_count=0;
317 squidguard_count=0;
318 useragent_count=0;
319 DeniedReportLimit=10;
320 AuthfailReportLimit=10;
321 DansGuardianReportLimit=10;
322 SquidGuardReportLimit=10;
323 DownloadReportLimit=50;
324 UserReportLimit=0;
325 debug=0;
326 debugz=0;
327 debugm=0;
328 iprel=false;
329 userip=false;
330 realt=false;
331 realtime_refresh=3;
332 realtime_access_log_lines=1000;
333 cost=0.01;
334 nocost=50000000;
335 ndownload=0;
336 squid24=false;
337 dfrom=0;
338 duntil=0;
339
340 bzero(IncludeUsers, sizeof(IncludeUsers));
341 bzero(ExcludeString, sizeof(ExcludeString));
342 first_user_file=NULL;
343 memset(&period,0,sizeof(period));
344
345 NAccessLog=0;
346 for(x=0; x<MAXLOGS; x++)
347 AccessLog[x][0]='\0';
348 AccessLogFromCmdLine=0;
349 RedirectorLogFromCmdLine=0;
350
351 strcpy(Title,_("Squid User Access Report"));
352
353 while((ch = getopt(argc, argv, "a:b:c:d:e:f:g:u:l:L:o:s:t:w:hijmnprvxyz")) != -1){
354 switch(ch)
355 {
356 case 'a':
357 strcpy(addr,optarg);
358 break;
359 case 'b':
360 strcpy(uagent,optarg);
361 break;
362 case 'c':
363 strcpy(hexclude,optarg);
364 break;
365 case 'd':
366 strncpy(date,optarg,sizeof(date)-1);
367 date[sizeof(date)-1]='\0';
368 date_from(date, &dfrom, &duntil);
369 break;
370 case 'e':
371 strcpy(email,optarg);
372 break;
373 case 'f':
374 strcpy(ConfigFile,optarg);
375 break;
376 case 'g':
377 strcpy(df,optarg);
378 break;
379 case 'h':
380 usage(argv[0]);
381 exit(EXIT_SUCCESS);
382 case 'i':
383 iprel=true;
384 break;
385 case 'l':
386 if (NAccessLog>=MAXLOGS) {
387 debuga(_("Too many log files passed on command line with option -l.\n"));
388 exit(EXIT_FAILURE);
389 }
390 if (strlen(optarg)>=MAX_LOG_FILELEN) {
391 debuga(_("Log file name too long passed on command line with option -l: %s\n"),optarg);
392 exit(EXIT_FAILURE);
393 }
394 strcpy(AccessLog[NAccessLog],optarg);
395 NAccessLog++;
396 AccessLogFromCmdLine++;
397 break;
398 case 'L':
399 if (NRedirectorLogs>MAX_REDIRECTOR_LOGS) {
400 debuga(_("Too many redirector logs passed on command line with option -L.\n"));
401 exit(EXIT_FAILURE);
402 }
403 if (strlen(optarg)>=MAX_REDIRECTOR_FILELEN) {
404 debuga(_("Redirector log file name too long passed on command line with opton -L: %s\n"),optarg);
405 exit(EXIT_FAILURE);
406 }
407 strcpy(RedirectorLogs[NRedirectorLogs],optarg);
408 NRedirectorLogs++;
409 RedirectorLogFromCmdLine++;
410 break;
411 case 'm':
412 debugm++;
413 break;
414 case 'n':
415 dns=true;
416 break;
417 case 'o':
418 strcpy(outdir,optarg);
419 break;
420 case 'p':
421 userip=true;
422 break;
423 case 'r':
424 realt=true;
425 break;
426 case 's':
427 strcpy(site,optarg);
428 break;
429 case 't':
430 {
431 int h,m;
432
433 if(strstr(optarg,"-") == 0) {
434 strcpy(hm,optarg);
435 strcpy(hmf,optarg);
436 } else {
437 getword_start(&gwarea,optarg);
438 if (getword(hm,sizeof(hm),&gwarea,'-')<0 || getword(hmf,sizeof(hmf),&gwarea,'\0')<0) {
439 debuga(_("The time range passed on the command line with option -t is invalid\n"));
440 exit(EXIT_FAILURE);
441 }
442 }
443 if(sscanf(hm,"%d:%d",&h,&m)!=2) {
444 debuga(_("Time period must be MM or MM:SS. Exit\n"));
445 exit(EXIT_FAILURE);
446 }
447 sprintf(hm,"%02d%02d",h,m);
448 if(sscanf(hmf,"%d:%d",&h,&m)!=2) {
449 debuga(_("Time period must be MM or MM:SS. Exit\n"));
450 exit(EXIT_FAILURE);
451 }
452 sprintf(hmf,"%02d%02d",h,m);
453 break;
454 }
455 case 'u':
456 strcpy(us,optarg);
457 break;
458 case 'v':
459 version();
460 break;
461 case 'w':
462 strcpy(tmp,optarg);
463 break;
464 case 'x':
465 debug++;
466 break;
467 case 'y':
468 langcode++;
469 break;
470 case 'z':
471 debugz++;
472 break;
473 case ':':
474 debuga(_("Option -%c require an argument\n"),optopt);
475 errflg++;
476 break;
477 case '?':
478 usage(argv[0]);
479 exit(EXIT_FAILURE);
480 }
481
482 }
483
484 if (errflg>0) {
485 usage(argv[0]);
486 exit(2);
487 }
488
489 if(debug) debuga(_("Init\n"));
490
491 if(ConfigFile[0] == '\0') snprintf(ConfigFile,sizeof(ConfigFile),"%s/sarg.conf",SYSCONFDIR);
492 if(access(ConfigFile, R_OK) != 0) {
493 debuga(_("Cannot open config file: %s - %s\n"),ConfigFile,strerror(errno));
494 exit(EXIT_FAILURE);
495 }
496
497 if(access(ConfigFile, R_OK) == 0)
498 getconf();
499
500 if(userip) UserIp=true;
501
502 if(dns) Ip2Name=true;
503
504 if(realt) {
505 realtime();
506 exit(EXIT_SUCCESS);
507 }
508
509 if(IndexTree == INDEX_TREE_FILE)
510 strcpy(ImageFile,"../images");
511 else
512 strcpy(ImageFile,"../../../images");
513
514 dataonly=0;
515 if(DataFile[0] != '\0')
516 dataonly++;
517
518 if(NAccessLog == 0) {
519 strcpy(AccessLog[0],"/var/log/squid/access.log");
520 NAccessLog++;
521 }
522
523 if(strcmp(hexclude,"onvert") == 0 && strcmp(site,"plit") != 0) {
524 convlog(AccessLog[0], df, dfrom, duntil);
525 exit(EXIT_SUCCESS);
526 }
527
528 if(strcmp(site,"plit") == 0) {
529 splitlog(AccessLog[0], df, dfrom, duntil, hexclude);
530 exit(EXIT_SUCCESS);
531 }
532
533 load_excludecodes(ExcludeCodes);
534
535 if(access(PasswdFile, R_OK) == 0) {
536 getusers(PasswdFile,debug);
537 puser++;
538 }
539
540 if(hexclude[0] == '\0')
541 strcpy(hexclude,ExcludeHosts);
542 if(hexclude[0] != '\0') {
543 gethexclude(hexclude,debug);
544 fhost=true;
545 }
546
547 if(ReportType == 0) {
548 ReportType=REPORT_TYPE_TOPUSERS | REPORT_TYPE_TOPSITES | REPORT_TYPE_USERS_SITES |
549 REPORT_TYPE_SITES_USERS | REPORT_TYPE_DATE_TIME | REPORT_TYPE_DENIED |
550 REPORT_TYPE_AUTH_FAILURES | REPORT_TYPE_SITE_USER_TIME_DATE | REPORT_TYPE_DOWNLOADS;
551 }
552
553 if(access(ExcludeUsers, R_OK) == 0) {
554 getuexclude(ExcludeUsers,debug);
555 fuser=true;
556 }
557
558 indexonly=0;
559 if(fuser) {
560 if(is_indexonly())
561 indexonly++;
562 }
563 if(strcmp(ExcludeUsers,"indexonly") == 0) indexonly++;
564 if(Index == INDEX_ONLY) indexonly++;
565
566 if(MaxElapsed[0] != '\0') max_elapsed=atol(MaxElapsed);
567
568 if(outdir[0] == '\0') strcpy(outdir,OutputDir);
569 strcat(outdir,"/");
570
571 if(uagent[0] == '\0') strcpy(uagent,UserAgentLog);
572
573 if(tmp[0] == '\0') strcpy(tmp,TempDir);
574 else strcpy(TempDir,tmp);
575
576 if(df[0] == '\0') strcpy(df,DateFormat);
577 else strcpy(DateFormat,df);
578
579 if(df[0] == '\0') {
580 strcpy(df,"u");
581 strcpy(DateFormat,"u");
582 }
583 if (df[0]=='w')
584 IndexTree=INDEX_TREE_FILE;
585
586 if(email[0] == '\0' && OutputEmail[0] != '\0') strcpy(email,OutputEmail);
587
588 strcpy(tmp2,tmp);
589
590 if(email[0] != '\0') {
591 snprintf(wtemp2,sizeof(wtemp2),"%s/sarg",tmp2);
592 my_mkdir(wtemp2);
593 strcat(tmp2,"/sarg");
594 strcpy(outdir,tmp2);
595 strcat(outdir,"/");
596 }
597
598 strcat(tmp2,"/sarg.log");
599
600 sprintf(tmp3,"%s/sarg",tmp);
601 if(access(tmp3, R_OK) == 0) {
602 unlinkdir(tmp3,1);
603 }
604 my_mkdir(tmp3);
605 strcpy(denied_unsort,tmp3);
606 strcpy(denied_sort,tmp3);
607 strcpy(authfail_unsort,tmp3);
608 strcat(denied_unsort,"/denied.log.unsort");
609 strcat(denied_sort,"/denied.log");
610 strcat(authfail_unsort,"/authfail.log.unsort");
611
612 if(debug) {
613 debuga(_("Parameters:\n"));
614 debuga(_(" Hostname or IP address (-a) = %s\n"),addr);
615 debuga(_(" Useragent log (-b) = %s\n"),uagent);
616 debuga(_(" Exclude file (-c) = %s\n"),hexclude);
617 debuga(_(" Date from-until (-d) = %s\n"),date);
618 debuga(_(" Email address to send reports (-e) = %s\n"),email);
619 debuga(_(" Config file (-f) = %s\n"),ConfigFile);
620 if(strcmp(df,"e") == 0)
621 debuga(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
622 if(strcmp(df,"u") == 0)
623 debuga(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
624 if(strcmp(df,"w") == 0)
625 debuga(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
626 debuga(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
627 for (iarq=0 ; iarq<NAccessLog ; iarq++)
628 debuga(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
629 for (iarq=0 ; iarq<NRedirectorLogs ; iarq++)
630 debuga(_(" Redirector log (-L) = %s\n"),RedirectorLogs[iarq]);
631 debuga(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
632 debuga(_(" Output dir (-o) = %s\n"),outdir);
633 debuga(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
634 debuga(_(" Accessed site (-s) = %s\n"),site);
635 debuga(_(" Time (-t) = %s\n"),hm);
636 debuga(_(" User (-u) = %s\n"),us);
637 debuga(_(" Temporary dir (-w) = %s\n"),tmp);
638 debuga(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
639 debuga(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
640 debuga("\n");
641 }
642
643 if(debugm) {
644 printf(_("Parameters:\n"));
645 printf(_(" Hostname or IP address (-a) = %s\n"),addr);
646 printf(_(" Useragent log (-b) = %s\n"),uagent);
647 printf(_(" Exclude file (-c) = %s\n"),hexclude);
648 printf(_(" Date from-until (-d) = %s\n"),date);
649 printf(_(" Email address to send reports (-e) = %s\n"),email);
650 printf(_(" Config file (-f) = %s\n"),ConfigFile);
651 if(strcmp(df,"e") == 0)
652 printf(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
653 if(strcmp(df,"u") == 0)
654 printf(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
655 if(strcmp(df,"w") == 0)
656 printf(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
657 printf(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
658 for (iarq=0 ; iarq<NAccessLog ; iarq++)
659 printf(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
660 for (iarq=0 ; iarq<NRedirectorLogs ; iarq++)
661 printf(_(" Redirector log (-L) = %s\n"),RedirectorLogs[iarq]);
662 printf(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
663 printf(_(" Output dir (-o) = %s\n"),outdir);
664 printf(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
665 printf(_(" Accessed site (-s) = %s\n"),site);
666 printf(_(" Time (-t) = %s\n"),hm);
667 printf(_(" User (-u) = %s\n"),us);
668 printf(_(" Temporary dir (-w) = %s\n"),tmp);
669 printf(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
670 printf(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
671 printf(_("sarg version: %s\n"),VERSION);
672 }
673
674 if(debug)
675 debuga(_("sarg version: %s\n"),VERSION);
676
677 maxopenfiles=MAX_OPEN_USER_FILES;
678 #ifdef HAVE_RLIM_T
679 if (Ulimit[0] != '\0') {
680 struct rlimit rl;
681 long l1, l2;
682 int rc=0;
683
684 #if defined(RLIMIT_NOFILE)
685 getrlimit (RLIMIT_NOFILE, &rl);
686 #elif defined(RLIMIT_OFILE)
687 getrlimit (RLIMIT_OFILE, &rl);
688 #else
689 #warning "No rlimit resource for the number of open files"
690 #endif
691 l1 = rl.rlim_cur;
692 l2 = rl.rlim_max;
693
694 rl.rlim_cur = atol(Ulimit);
695 rl.rlim_max = atol(Ulimit);
696 #if defined(RLIMIT_NOFILE)
697 rc=setrlimit (RLIMIT_NOFILE, &rl);
698 #elif defined(RLIMIT_OFILE)
699 rc=setrlimit (RLIMIT_OFILE, &rl);
700 #else
701 #warning "No rlimit resource for the number of open files"
702 #endif
703 if(rc == -1) {
704 debuga(_("setrlimit error - %s\n"),strerror(errno));
705 }
706
707 if(debug)
708 debuga("Maximum file descriptor: cur=%ld max=%ld, changed to cur="RLIM_STRING" max="RLIM_STRING,l1,l2,rl.rlim_cur,rl.rlim_max);
709 }
710 #endif
711
712 init_usertab(UserTabFile);
713
714 if ((line=longline_create())==NULL) {
715 debuga(_("Not enough memory to read a log file\n"));
716 exit(EXIT_FAILURE);
717 }
718
719 snprintf(sz_Download_Unsort,sizeof(sz_Download_Unsort),"%s/sarg/download.unsort", tmp);
720
721 if(DataFile[0]=='\0') {
722 if((ReportType & REPORT_TYPE_DENIED) != 0) {
723 if((fp_denied=MY_FOPEN(denied_unsort,"w"))==NULL) {
724 debuga(_("(log) Cannot open file: %s - %s\n"),denied_unsort,strerror(errno));
725 exit(EXIT_FAILURE);
726 }
727 }
728
729 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
730 if((fp_authfail=MY_FOPEN(authfail_unsort,"w"))==NULL) {
731 debuga(_("(log) Cannot open file: %s - %s\n"),authfail_unsort,strerror(errno));
732 exit(EXIT_FAILURE);
733 }
734 }
735 }
736
737 for (iarq=0 ; iarq<NAccessLog ; iarq++) {
738 strcpy(arq,AccessLog[iarq]);
739
740 strcpy(arqtt,arq);
741
742 if(strcmp(arq,"-")==0) {
743 if(debug)
744 debuga(_("Reading access log file: from stdin\n"));
745 fp_in=stdin;
746 from_stdin=true;
747 } else {
748 if (date[0]!='\0') {
749 if (stat(arq,&logstat)!=0) {
750 debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
751 } else {
752 struct tm *logtime=localtime(&logstat.st_mtime);
753 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
754 debuga(_("Ignoring old log file %s\n"),arq);
755 continue;
756 }
757 }
758 }
759 fp_in=decomp(arq,&from_pipe);
760 if(fp_in==NULL) {
761 debuga(_("(log) Cannot open log file: %s - %s\n"),arq,strerror(errno));
762 exit(EXIT_FAILURE);
763 }
764 if(debug) debuga(_("Reading access log file: %s\n"),arq);
765 from_stdin=false;
766 }
767 ilf=ILF_Unknown;
768 download_flag=false;
769 // pre-read the file only if we have to show stats
770 if(ShowReadStatistics && !from_stdin && !from_pipe) {
771 size_t nread,i;
772 bool skipcr=false;
773 char tmp4[MAXLEN];
774
775 recs1=0UL;
776 recs2=0UL;
777
778 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
779 for (i=0 ; i<nread ; i++)
780 if (skipcr) {
781 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
782 skipcr=false;
783 }
784 } else {
785 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
786 skipcr=true;
787 recs1++;
788 }
789 }
790 }
791 rewind(fp_in);
792 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
793 putchar('\r');
794 fflush( stdout ) ;
795 }
796
797 longline_reset(line);
798
799 while ((linebuf=longline_read(fp_in,line))!=NULL) {
800 blen=strlen(linebuf);
801
802 if (ilf==ILF_Unknown) {
803 if(strncmp(linebuf,"#Software: Mic",14) == 0) {
804 fixendofline(linebuf);
805 if (debug)
806 debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
807 ilf=ILF_Isa;
808 ilf_count[ilf]++;
809 continue;
810 }
811
812 if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
813 if (getperiod_fromsarglog(arqtt,&period)<0) {
814 debuga(_("The name of the file is invalid: %s\n"),arq);
815 exit(EXIT_FAILURE);
816 }
817 ilf=ILF_Sarg;
818 ilf_count[ilf]++;
819 continue;
820 }
821 }
822
823 if(!fp_log && strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg) {
824 if(access(ParsedOutputLog,R_OK) != 0) {
825 my_mkdir(ParsedOutputLog);
826 }
827 sprintf(arq_log,"%s/sarg_temp.log",ParsedOutputLog);
828 if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
829 debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
830 exit(EXIT_FAILURE);
831 }
832 fputs("*** SARG Log ***\n",fp_log);
833 }
834
835 recs2++;
836 if( ShowReadStatistics && !from_stdin && --OutputNonZero<=0) {
837 double perc = recs2 * 100. / recs1 ;
838 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs1,perc);
839 putchar('\r');
840 fflush (stdout);
841 OutputNonZero = REPORT_EVERY_X_LINES ;
842 }
843 if(blen < 58) continue;
844 if(strstr(linebuf,"HTTP/0.0") != 0) continue;
845 if(strstr(linebuf,"logfile turned over") != 0) continue;
846 if(linebuf[0] == ' ') continue;
847
848 // exclude_string
849 if(ExcludeString[0] != '\0') {
850 bool exstring=false;
851 getword_start(&gwarea,ExcludeString);
852 while(strchr(gwarea.current,':') != 0) {
853 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
854 debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
855 exit(EXIT_FAILURE);
856 }
857 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
858 exstring=true;
859 break;
860 }
861 }
862 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
863 exstring=true;
864 if(exstring) continue;
865 }
866
867 totregsl++;
868 if(debugm)
869 printf("BUF=%s\n",linebuf);
870
871 t=NULL;
872 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
873 getword_start(&gwarea,linebuf);
874 if (getword(data,sizeof(data),&gwarea,' ')<0) {
875 debuga(_("Maybe you have a broken time in your access.log file\n"));
876 exit(EXIT_FAILURE);
877 }
878 if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
879 strcpy(ip,data);
880 strcpy(elap,"0");
881 if(squid24) {
882 if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
883 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
884 exit(EXIT_FAILURE);
885 }
886 } else {
887 if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
888 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
889 exit(EXIT_FAILURE);
890 }
891 }
892 if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
893 getword(fun,sizeof(fun),&gwarea,' ')<0) {
894 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
895 exit(EXIT_FAILURE);
896 }
897 if (getword_ptr(linebuf,&url,&gwarea,' ')<0) {
898 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
899 exit(EXIT_FAILURE);
900 }
901 if (getword_skip(MAXLEN,&gwarea,' ')<0) {
902 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
903 exit(EXIT_FAILURE);
904 }
905 if (getword(code2,sizeof(code2),&gwarea,' ')<0) {
906 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
907 exit(EXIT_FAILURE);
908 }
909 if (getword(tam,sizeof(tam),&gwarea,' ')<0) {
910 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
911 exit(EXIT_FAILURE);
912 }
913 if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
914 if (getword(code,sizeof(code),&gwarea,' ')<0) {
915 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
916 exit(EXIT_FAILURE);
917 }
918 } else {
919 if (getword(code,sizeof(code),&gwarea,'\0')<0) {
920 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
921 exit(EXIT_FAILURE);
922 }
923 }
924
925 if ((str = strchr(code, ':')) != NULL)
926 *str = '/';
927
928 if(strcmp(tam,"\0") == 0)
929 strcpy(tam,"0");
930
931 ilf=ILF_Common;
932 ilf_count[ilf]++;
933
934 getword_start(&gwarea,data+1);
935 if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
936 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
937 exit(EXIT_FAILURE);
938 }
939 if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
940 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
941 exit(EXIT_FAILURE);
942 }
943 getword_start(&gwarea,data);
944 if (getword_atoll(&iday,&gwarea,'/')<0){
945 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
946 exit(EXIT_FAILURE);
947 }
948 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
949 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
950 exit(EXIT_FAILURE);
951 }
952 if (getword_atoll(&iyear,&gwarea,'/')<0){
953 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
954 exit(EXIT_FAILURE);
955 }
956
957 imonth=month2num(mes)+1;
958 idata=builddia(iday,imonth,iyear);
959 computedate(iyear,imonth,iday,&tt);
960 t=&tt;
961 }
962
963 if(ilf==ILF_Unknown || ilf==ILF_Squid) {
964 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
965 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
966 exit(EXIT_FAILURE);
967 }
968 while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
969 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
970 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
971 exit(EXIT_FAILURE);
972 }
973 if(strlen(elap) < 1) continue;
974 if (getword(ip,sizeof(ip),&gwarea,' ')<0){
975 debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq);
976 exit(EXIT_FAILURE);
977 }
978 if (getword(code,sizeof(code),&gwarea,' ')<0){
979 debuga(_("Maybe you have a broken result code in your %s file\n"),arq);
980 exit(EXIT_FAILURE);
981 }
982 if (getword(tam,sizeof(tam),&gwarea,' ')<0){
983 debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq);
984 exit(EXIT_FAILURE);
985 }
986 if (getword(fun,sizeof(fun),&gwarea,' ')<0){
987 debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
988 exit(EXIT_FAILURE);
989 }
990 if (getword_ptr(linebuf,&url,&gwarea,' ')<0){
991 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
992 exit(EXIT_FAILURE);
993 }
994 if (getword(user,sizeof(user),&gwarea,' ')<0){
995 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
996 exit(EXIT_FAILURE);
997 }
998 ilf=ILF_Squid;
999 ilf_count[ilf]++;
1000
1001 tnum=atoi(data);
1002 t=localtime(&tnum);
1003 if (t == NULL) {
1004 debuga(_("Cannot convert the timestamp from the squid log file\n"));
1005 exit(EXIT_FAILURE);
1006 }
1007
1008 strftime(tbuf2, sizeof(tbuf2), "%H%M", t);
1009
1010 idata=(t->tm_year+1900)*10000+(t->tm_mon+1)*100+t->tm_mday;
1011 }
1012 }
1013 if (ilf==ILF_Sarg) {
1014 getword_start(&gwarea,linebuf);
1015 if (getword(data,sizeof(data),&gwarea,'\t')<0){
1016 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1017 exit(EXIT_FAILURE);
1018 }
1019 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
1020 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1021 exit(EXIT_FAILURE);
1022 }
1023 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
1024 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1025 exit(EXIT_FAILURE);
1026 }
1027 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
1028 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1029 exit(EXIT_FAILURE);
1030 }
1031 if (getword_ptr(linebuf,&url,&gwarea,'\t')<0){
1032 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1033 exit(EXIT_FAILURE);
1034 }
1035 if (getword(tam,sizeof(tam),&gwarea,'\t')<0){
1036 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1037 exit(EXIT_FAILURE);
1038 }
1039 if (getword(code,sizeof(code),&gwarea,'\t')<0){
1040 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1041 exit(EXIT_FAILURE);
1042 }
1043 if (getword(elap,sizeof(elap),&gwarea,'\t')<0){
1044 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1045 exit(EXIT_FAILURE);
1046 }
1047 if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
1048 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1049 exit(EXIT_FAILURE);
1050 }
1051 getword_start(&gwarea,data);
1052 if(strcmp(df,"u") == 0) {
1053 if (getword_atoll(&imonth,&gwarea,'/')<0){
1054 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1055 exit(EXIT_FAILURE);
1056 }
1057 if (getword_atoll(&iday,&gwarea,'/')<0){
1058 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1059 exit(EXIT_FAILURE);
1060 }
1061 } else {
1062 if (getword_atoll(&iday,&gwarea,'/')<0){
1063 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1064 exit(EXIT_FAILURE);
1065 }
1066 if (getword_atoll(&imonth,&gwarea,'/')<0){
1067 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1068 exit(EXIT_FAILURE);
1069 }
1070 }
1071 if (getword_atoll(&iyear,&gwarea,'\0')<0){
1072 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1073 exit(EXIT_FAILURE);
1074 }
1075 idata=builddia(iday,imonth,iyear);
1076 computedate(iyear,imonth,iday,&tt);
1077 t=&tt;
1078 }
1079 if (ilf==ILF_Isa) {
1080 if (linebuf[0] == '#') {
1081 int ncols,cols[ISACOL_Last];
1082
1083 fixendofline(linebuf);
1084 getword_start(&gwarea,linebuf);
1085 // remove the #Fields: column at the beginning of the line
1086 if (getword_skip(1000,&gwarea,' ')<0){
1087 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1088 exit(EXIT_FAILURE);
1089 }
1090 for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
1091 ncols=0;
1092 while(gwarea.current[0] != '\0') {
1093 if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
1094 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1095 exit(EXIT_FAILURE);
1096 }
1097 if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
1098 if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
1099 if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
1100 if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
1101 if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
1102 if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
1103 if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
1104 if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
1105 ncols++;
1106 }
1107 if (cols[ISACOL_Ip]>=0) {
1108 isa_ncols=ncols;
1109 for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
1110 isa_cols[ncols]=cols[ncols];
1111 }
1112 continue;
1113 }
1114 if (!isa_ncols) continue;
1115 getword_start(&gwarea,linebuf);
1116 for (x=0 ; x<isa_ncols ; x++) {
1117 if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
1118 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1119 exit(EXIT_FAILURE);
1120 }
1121 if (x==isa_cols[ISACOL_Ip]) {
1122 if (strlen(str)>=sizeof(ip)) {
1123 debuga(_("Maybe you have a broken IP in your %s file\n"),arq);
1124 exit(EXIT_FAILURE);
1125 }
1126 strcpy(ip,str);
1127 } else if (x==isa_cols[ISACOL_UserName]) {
1128 if (strlen(str)>=sizeof(user)) {
1129 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
1130 exit(EXIT_FAILURE);
1131 }
1132 strcpy(user,str);
1133 } else if (x==isa_cols[ISACOL_Date]) {
1134 if (strlen(str)>=sizeof(data)) {
1135 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1136 exit(EXIT_FAILURE);
1137 }
1138 strcpy(data,str);
1139 } else if (x==isa_cols[ISACOL_Time]) {
1140 if (strlen(str)>=sizeof(hora)) {
1141 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1142 exit(EXIT_FAILURE);
1143 }
1144 strcpy(hora,str);
1145 } else if (x==isa_cols[ISACOL_TimeTaken]) {
1146 if (strlen(str)>=sizeof(elap)) {
1147 debuga(_("Maybe you have a broken download duration in your %s file\n"),arq);
1148 exit(EXIT_FAILURE);
1149 }
1150 strcpy(elap,str);
1151 } else if (x==isa_cols[ISACOL_Bytes]) {
1152 if (strlen(str)>=sizeof(tam)) {
1153 debuga(_("Maybe you have a broken download size in your %s file\n"),arq);
1154 exit(EXIT_FAILURE);
1155 }
1156 strcpy(tam,str);
1157 } else if (x==isa_cols[ISACOL_Uri]) {
1158 url=str;
1159 } else if (x==isa_cols[ISACOL_Status]) {
1160 if (strlen(str)>=sizeof(code)) {
1161 debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
1162 exit(EXIT_FAILURE);
1163 }
1164 strcpy(code,str);
1165 }
1166 }
1167
1168 if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
1169 sprintf(val1,"DENIED/%s",code);
1170 strcpy(code,val1);
1171 }
1172 getword_start(&gwarea,data);
1173 if (getword_atoll(&iyear,&gwarea,'-')<0){
1174 debuga(_("Maybe you have a broken year in your %s file\n"),arq);
1175 exit(EXIT_FAILURE);
1176 }
1177 if (getword_atoll(&imonth,&gwarea,'-')<0){
1178 debuga(_("Maybe you have a broken month in your %s file\n"),arq);
1179 exit(EXIT_FAILURE);
1180 }
1181 if (getword_atoll(&iday,&gwarea,'\0')<0){
1182 debuga(_("Maybe you have a broken day in your %s file\n"),arq);
1183 exit(EXIT_FAILURE);
1184 }
1185
1186 idata=builddia(iday,imonth,iyear);
1187 computedate(iyear,imonth,iday,&tt);
1188 t=&tt;
1189 }
1190 if (t==NULL) {
1191 debuga(_("Unknown input log file format\n"));
1192 break;
1193 }
1194
1195 if(strncmp(df,"u",1)==0)
1196 strftime(dia, sizeof(dia), "%m/%d/%Y", t);
1197 else
1198 strftime(dia, sizeof(dia), "%d/%m/%Y", t);
1199 snprintf(hora,sizeof(hora),"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec);
1200
1201 if(debugm)
1202 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",date,idata,dfrom,duntil);
1203
1204 if(date[0] != '\0'){
1205 if(idata < dfrom || idata > duntil) continue;
1206 }
1207
1208 // Record only hours usage which is required
1209 if (t) {
1210 if( bsearch( &( t -> tm_wday ), weekdays.list, weekdays.len,
1211 sizeof( int ), compar ) == NULL )
1212 continue;
1213
1214 if( bsearch( &( t -> tm_hour ), hours.list, hours.len,
1215 sizeof( int ), compar ) == NULL )
1216 continue;
1217 }
1218
1219
1220 if(strlen(user) > MAX_USER_LEN) {
1221 if (debugm) printf(_("User ID too long: %s\n"),user);
1222 totregsx++;
1223 continue;
1224 }
1225
1226 // include_users
1227 if(IncludeUsers[0] != '\0') {
1228 sprintf(val1,":%s:",user);
1229 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
1230 continue;
1231 }
1232
1233 if(vercode(code)) {
1234 if (debugm) printf(_("Excluded code: %s\n"),code);
1235 totregsx++;
1236 continue;
1237 }
1238
1239 if(testvaliduserchar(user))
1240 continue;
1241
1242 #if 0
1243 if((str = strstr(user,"%20")) != NULL) {
1244 /*
1245 This is a patch introduced to solve bug #1624251 reported at sourceforge but
1246 the side effect is to truncate the name at the first space and merge the reports
1247 of people whose name is identical up to the first space.
1248
1249 The old code used to truncate the user name at the first % if a %20 was
1250 found anywhere in the string. That means the string could be truncated
1251 at the wrong place if another % occured before the %20. This new code should
1252 avoid that problem and only truncate at the space. There is no bug
1253 report indicating that anybody noticed this.
1254 */
1255 *str='\0';
1256 }
1257
1258 /*
1259 Code prior to 2.2.7 used to replace any %xx by a dot as long as a %5c was
1260 found in the user name.
1261 */
1262 while((str = strstr(user,"%5c")) != NULL) {
1263 *str='.';
1264 for (x=3 ; str[x] ; x++) str[x-2]=str[x];
1265 }
1266 #endif
1267
1268 urly=url;
1269
1270 if(ilf!=ILF_Sarg) {
1271 /*
1272 The full URL is not saved in sarg log. There is no point in testing the URL to detect
1273 a downloaded file.
1274 */
1275 download_flag=is_download_suffix(url);
1276 if (download_flag) {
1277 download_url=url;
1278 download_count++;
1279 }
1280 } else
1281 download_flag=false;
1282
1283 // remove any protocol:// at the beginning of the URL
1284 if ((str = strchr(url,'/')) != NULL && str[1] == '/') {
1285 int i;
1286
1287 str+=2;
1288 for (i=0 ; str[i] ; i++)
1289 url[i]=str[i];
1290 url[i]='\0';
1291 }
1292
1293 if(!LongUrl) {
1294 url_hostname(url,hostname,sizeof(hostname));
1295 url=hostname;
1296 }
1297
1298 if(url[0] == '\0') continue;
1299
1300 if(addr[0] != '\0'){
1301 if(strcmp(addr,ip)!=0) continue;
1302 }
1303 if(fhost) {
1304 if(!vhexclude(url)) {
1305 if (debugm) printf(_("Excluded site: %s\n"),url);
1306 totregsx++;
1307 continue;
1308 }
1309 }
1310
1311 if(hm[0] != '\0') {
1312 hmr[0]='\0';
1313 chm++;
1314 getword_start(&gwarea,hora);
1315 while(chm) {
1316 if (getword_multisep(warea,sizeof(warea),&gwarea,':')<0){
1317 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1318 exit(EXIT_FAILURE);
1319 }
1320 strncat(hmr,warea,2);
1321 chm--;
1322 }
1323 strncat(hmr,gwarea.current,2);
1324
1325 if(atoi(hmr) < atoi(hm) || atoi(hmr) > atoi(hmf)) continue;
1326 }
1327
1328 if(site[0] != '\0'){
1329 if(strstr(url,site)==0) continue;
1330 }
1331
1332 if(UserIp) {
1333 strcpy(user,ip);
1334 id_is_ip=true;
1335 } else {
1336 id_is_ip=false;
1337 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
1338 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
1339 strcpy(user,ip);
1340 id_is_ip=true;
1341 }
1342 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
1343 continue;
1344 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
1345 strcpy(user,"everybody");
1346 } else {
1347 strlow(user);
1348 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
1349 if((str = strchr(user,'_')) != 0) {
1350 strcpy(warea,str+1);
1351 strcpy(user,warea);
1352 }
1353 if((str = strchr(user,'+')) != 0) {
1354 strcpy(warea,str+1);
1355 strcpy(user,warea);
1356 }
1357 }
1358 }
1359 }
1360
1361 if(us[0] != '\0'){
1362 if(strcmp(user,us)!=0) continue;
1363 }
1364
1365 if(puser) {
1366 sprintf(wuser,":%s:",user);
1367 if(strstr(userfile, wuser) == 0)
1368 continue;
1369 }
1370
1371 if(fuser) {
1372 if(!vuexclude(user)) {
1373 if (debugm) printf(_("Excluded user: %s\n"),user);
1374 totregsx++;
1375 continue;
1376 }
1377 }
1378
1379 if(strcmp(user,"-") ==0 || strcmp(user," ") ==0 || strcmp(user,"") ==0 || strcmp(user,":") ==0)
1380 continue;
1381
1382 if(max_elapsed) {
1383 if(atol(elap)>max_elapsed) {
1384 elap[0]='0';
1385 elap[1]='\0';
1386 }
1387 }
1388
1389 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
1390 fixendofline(str);
1391 sprintf(smartfilter,"\"%s\"",str+1);
1392 } else sprintf(smartfilter,"\"\"");
1393
1394 nopen=0;
1395 prev_ufile=NULL;
1396 for (ufile=first_user_file ; ufile && strcmp(user,ufile->user->id)!=0 ; ufile=ufile->next) {
1397 prev_ufile=ufile;
1398 if (ufile->file) nopen++;
1399 }
1400 if (!ufile) {
1401 ufile=malloc(sizeof(*ufile));
1402 if (!ufile) {
1403 debuga(_("Not enough memory to store the user %s\n"),user);
1404 exit(EXIT_FAILURE);
1405 }
1406 memset(ufile,0,sizeof(*ufile));
1407 ufile->next=first_user_file;
1408 first_user_file=ufile;
1409 uinfo=userinfo_create(user);
1410 ufile->user=uinfo;
1411 uinfo->id_is_ip=id_is_ip;
1412 } else {
1413 if (prev_ufile) {
1414 prev_ufile->next=ufile->next;
1415 ufile->next=first_user_file;
1416 first_user_file=ufile;
1417 }
1418 }
1419
1420 if (ufile->file==NULL) {
1421 if (nopen>=maxopenfiles) {
1422 x=0;
1423 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
1424 if (ufile1->file!=NULL) {
1425 if (x>=maxopenfiles) {
1426 if (fclose(ufile1->file)==EOF) {
1427 debuga(_("Failed to close the log file of user %s - %s\n"),ufile1->user->id,strerror(errno));
1428 exit(EXIT_FAILURE);
1429 }
1430 ufile1->file=NULL;
1431 }
1432 x++;
1433 }
1434 }
1435 }
1436 if (snprintf (tmp3, sizeof(tmp3), "%s/sarg/%s.unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
1437 debuga(_("Temporary user file name too long: %s/sarg/%s.unsort\n"), tmp, ufile->user->filename);
1438 exit(EXIT_FAILURE);
1439 }
1440 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
1441 debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno));
1442 exit (1);
1443 }
1444 }
1445
1446 /*if ( strcmp ( user , sz_Last_User ) != 0 ) {
1447 if ( fp_Write_User )
1448 fclose( fp_Write_User ) ;
1449 sprintf (tmp3, "%s/sarg/%s.unsort", tmp, user);
1450
1451 if ((fp_Write_User = MY_FOPEN (tmp3, "a")) == NULL) {
1452 fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], _("Cannot open temporary file"), tmp3, strerror(errno));
1453 exit (1);
1454 }
1455 strcpy( sz_Last_User , user ) ;
1456 }*/
1457 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,ip,url,tam,code,elap,smartfilter)<=0) {
1458 debuga(_("Write error in the log file of user %s\n"),user);
1459 exit(EXIT_FAILURE);
1460 }
1461
1462 if(fp_log && ilf!=ILF_Sarg)
1463 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,url,tam,code,elap,smartfilter);
1464
1465 totregsg++;
1466
1467 if(!dataonly && download_flag && download_url && strstr(code,"DENIED") == 0) {
1468 ndownload = 1;
1469
1470 if ( ! fp_Download_Unsort ) {
1471 if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
1472 debuga(_("(log) Cannot open temporary file: %s - %s\n"),tmp3, strerror(errno));
1473 exit (1);
1474 }
1475 }
1476 fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,download_url);
1477 }
1478
1479 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
1480 if(fp_denied && strstr(code,"DENIED/403") != 0) {
1481 fprintf(fp_denied, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1482 denied_count++;
1483 }
1484 if(fp_authfail && (strstr(code,"DENIED/401") != 0 || strstr(code,"DENIED/407") != 0)) {
1485 fprintf(fp_authfail, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1486 authfail_count++;
1487 }
1488 }
1489
1490 if (ilf!=ILF_Sarg) {
1491 if(!totper || idata<mindate){
1492 mindate=idata;
1493 memcpy(&period.start,t,sizeof(*t));
1494 strcpy(start_hour,tbuf2);
1495 }
1496 if (!totper || idata>maxdate) {
1497 maxdate=idata;
1498 memcpy(&period.end,t,sizeof(*t));
1499 }
1500 totper=true;
1501 }
1502
1503 if(debugm){
1504 printf("IP=\t%s\n",ip);
1505 printf("USER=\t%s\n",user);
1506 printf("ELAP=\t%s\n",elap);
1507 printf("DATE=\t%s\n",dia);
1508 printf("TIME=\t%s\n",hora);
1509 printf("FUNC=\t%s\n",fun);
1510 printf("URL=\t%s\n",url);
1511 printf("CODE=\t%s\n",code);
1512 printf("LEN=\t%s\n",tam);
1513 }
1514 }
1515 if (!from_stdin) {
1516 if (from_pipe)
1517 pclose(fp_in);
1518 else
1519 fclose(fp_in);
1520 if( ShowReadStatistics )
1521 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs1, (float) 100 );
1522 }
1523 }
1524
1525 if (debug)
1526 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
1527
1528 longline_destroy(&line);
1529 if ( fp_Download_Unsort )
1530 fclose (fp_Download_Unsort);
1531
1532 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
1533 ufile1=ufile->next;
1534 if (ufile->file!=NULL) fclose(ufile->file);
1535 free(ufile);
1536 }
1537
1538 free_download();
1539 free_excludecodes();
1540 free_exclude();
1541
1542 if(debug) {
1543 int totalcount=0;
1544
1545 for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
1546
1547 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
1548 debuga(_("Log with mixed records format (squid and common log)\n"));
1549
1550 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
1551 debuga(_("Common log format\n"));
1552
1553 if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
1554 debuga(_("Squid log format\n"));
1555
1556 if(ilf_count[ILF_Sarg]>0)
1557 debuga(_("Sarg log format\n"));
1558
1559 if(totalcount==0 && totregsg)
1560 debuga(_("Log with invalid format\n"));
1561 }
1562
1563 if(!totregsg){
1564 debuga(_("No records found\n"));
1565 debuga(_("End\n"));
1566 if(fp_denied) fclose(fp_denied);
1567 if(fp_authfail) fclose(fp_authfail);
1568 userinfo_free();
1569 if(userfile) free(userfile);
1570 close_usertab();
1571 exit(EXIT_SUCCESS);
1572 }
1573
1574 if (date[0]!='\0') {
1575 char date0[30], date1[30];
1576
1577 strftime(date0,sizeof(date0),"%d/%m/%Y",&period.start);
1578 strftime(date1,sizeof(date1),"%d/%m/%Y",&period.end);
1579 debuga(_("Period covered by log files: %s-%s\n"),date0,date1);
1580 getperiod_fromrange(&period,dfrom,duntil);
1581 }
1582 if (getperiod_buildtext(&period)<0) {
1583 debuga(_("Failed to build the string representation of the date range\n"));
1584 exit(EXIT_FAILURE);
1585 }
1586
1587 if(debugz){
1588 debugaz("data",dia);
1589 debugaz("period",period.text);
1590 }
1591
1592 if(debug)
1593 debuga(_("Period: %s\n"),period.text);
1594
1595 // fclose(fp_ou);
1596 if(fp_denied)
1597 fclose(fp_denied);
1598 if(fp_authfail)
1599 fclose(fp_authfail);
1600
1601 if(fp_log != NULL) {
1602 fclose(fp_log);
1603 strcpy(end_hour,tbuf2);
1604 strftime(val2,sizeof(val2),"%d%m%Y",&period.start);
1605 strftime(val1,sizeof(val1),"%d%m%Y",&period.end);
1606 sprintf(val4,"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour);
1607 if (rename(arq_log,val4)) {
1608 debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno));
1609 } else {
1610 strcpy(arq_log,val4);
1611
1612 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
1613 /*
1614 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
1615 necessary around the command name, put them in the configuration file.
1616 */
1617 sprintf(val1,"%s \"%s\"",ParsedOutputLogCompress,arq_log);
1618 cstatus=system(val1);
1619 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1620 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
1621 debuga(_("command: %s\n"),val1);
1622 exit(EXIT_FAILURE);
1623 }
1624 }
1625 }
1626 if(debug)
1627 debuga(_("Sarg parsed log saved as %s\n"),arq_log);
1628 }
1629
1630 if(DataFile[0] == '\0' && (ReportType & REPORT_TYPE_DENIED) != 0) {
1631 sprintf(csort,"sort -T \"%s\" -k 3,3 -k 5,5 -o \"%s\" \"%s\"",tmp,denied_sort,denied_unsort);
1632 cstatus=system(csort);
1633 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1634 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
1635 debuga(_("sort command: %s\n"),csort);
1636 exit(EXIT_FAILURE);
1637 }
1638 unlink(denied_unsort);
1639 }
1640
1641 sort_users_log(tmp, debug);
1642
1643 if(DataFile[0] != '\0')
1644 data_file(tmp);
1645 else
1646 gerarel();
1647
1648 unlink(tmp2);
1649 if((ReportType & REPORT_TYPE_DENIED) != 0)
1650 unlink(denied_sort);
1651
1652 if(strcmp(tmp,"/tmp") != 0) {
1653 unlinkdir(tmp,0);
1654 }
1655
1656 userinfo_free();
1657 if(userfile)
1658 free(userfile);
1659 close_usertab();
1660
1661 if(debug)
1662 debuga(_("End\n"));
1663
1664 exit(EXIT_SUCCESS);
1665
1666 }
1667
1668
1669 static void getusers(const char *pwdfile, int debug)
1670 {
1671
1672 FILE *fp_usr;
1673 char buf[255];
1674 char *str;
1675 long int nreg=0;
1676
1677 if(debug)
1678 debuga(_("Loading password file from %s\n"),pwdfile);
1679
1680 if ((fp_usr = fopen(pwdfile, "r")) == NULL) {
1681 debuga(_("(getusers) Cannot open file %s - %s\n"),pwdfile,strerror(errno));
1682 exit(EXIT_FAILURE);
1683 }
1684
1685 if (fseek(fp_usr, 0, SEEK_END)==-1) {
1686 debuga(_("Failed to move till the end of the users file %s: %s\n"),pwdfile,strerror(errno));
1687 exit(EXIT_FAILURE);
1688 }
1689 nreg = ftell(fp_usr);
1690 if (nreg<0) {
1691 debuga(_("Cannot get the size of file %s\n"),pwdfile);
1692 exit(EXIT_FAILURE);
1693 }
1694 nreg = nreg+5000;
1695 if (fseek(fp_usr, 0, SEEK_SET)==-1) {
1696 debuga(_("Failed to rewind the users file %s: %s\n"),pwdfile,strerror(errno));
1697 exit(EXIT_FAILURE);
1698 }
1699
1700 if((userfile=(char *) malloc(nreg))==NULL){
1701 debuga(_("malloc error (%ld)\n"),nreg);
1702 exit(EXIT_FAILURE);
1703 }
1704
1705 bzero(userfile,nreg);
1706 strcpy(userfile,":");
1707
1708 while(fgets(buf,sizeof(buf),fp_usr)!=NULL) {
1709 str=strchr(buf,':');
1710 if (!str) {
1711 debuga(_("You have an invalid user in your %s file\n"),pwdfile);
1712 exit(EXIT_FAILURE);
1713 }
1714 str[1]='\0';
1715 strcat(userfile,buf);
1716 }
1717
1718 fclose(fp_usr);
1719
1720 return;
1721 }