]> git.ipfire.org Git - thirdparty/sarg.git/blob - log.c
Don't use string comparison to sort the top users, top sites and user lists.
[thirdparty/sarg.git] / log.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2010
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 #ifdef HAVE_GETOPT_H
31 #include <getopt.h>
32 #endif
33
34 #define REPORT_EVERY_X_LINES 5000
35 #define MAX_OPEN_USER_FILES 10
36
37 struct userfilestruct
38 {
39 struct userfilestruct *next;
40 struct userinfostruct *user;
41 FILE *file;
42 };
43
44 /*@null@*/static char *userfile=NULL;
45
46 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
47 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
48 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
49
50 static void getusers(const char *pwdfile, int debug);
51
52 int main(int argc,char *argv[])
53 {
54 enum isa_col_id {
55 ISACOL_Ip,
56 ISACOL_UserName,
57 ISACOL_Date,
58 ISACOL_Time,
59 ISACOL_TimeTaken,
60 ISACOL_Bytes,
61 ISACOL_Uri,
62 ISACOL_Status,
63 ISACOL_Last //last entry of the list !
64 };
65 enum InputLogFormat {
66 ILF_Unknown,
67 ILF_Squid,
68 ILF_Common,
69 ILF_Sarg,
70 ILF_Isa,
71 ILF_Last //last entry of the list !
72 };
73
74 FILE *fp_in = NULL, *fp_denied=NULL, *fp_authfail=NULL, *fp_log=NULL;
75
76 char sz_Download_Unsort[ 20000 ] ;
77 FILE * fp_Download_Unsort = NULL ;
78
79 extern int optind;
80 extern int optopt;
81 extern char *optarg;
82
83 char data[255];
84 char elap[255];
85 char ip[MAXLEN];
86 char tam[255];
87 char fun[MAXLEN];
88 char wuser[MAXLEN];
89 char smartfilter[MAXLEN];
90 char dia[128];
91 char mes[30];
92 char ano[30];
93 char hora[30];
94 char wtemp[MAXLEN];
95 char date[255];
96 char arq[255];
97 char arq_log[255];
98 char hm[15], hmf[15], hmr[15];
99 int chm=0;
100 char uagent[MAXLEN];
101 char hexclude[MAXLEN];
102 char csort[MAXLEN];
103 int cstatus;
104 char tbuf2[128];
105 char zip[20];
106 char *str;
107 char tmp3[MAXLEN];
108 char denied_unsort[MAXLEN];
109 char denied_sort[MAXLEN];
110 char authfail_unsort[MAXLEN];
111 char start_hour[128];
112 char end_hour[128];
113 char *linebuf;
114 char hostname[512];
115 char *url;
116 char *urly;
117 char user[MAX_USER_LEN];
118 enum InputLogFormat ilf;
119 int ilf_count[ILF_Last];
120 int ch;
121 int x;
122 int errflg=0;
123 int puser=0;
124 bool fhost=false;
125 bool dns=false;
126 bool fuser=false;
127 int idata=0;
128 int mindate=0;
129 int maxdate=0;
130 int iarq=0;
131 int isa_ncols=0,isa_cols[ISACOL_Last];
132 bool from_stdin;
133 bool from_pipe;
134 int blen;
135 int maxopenfiles;
136 int nopen;
137 bool id_is_ip;
138 long totregsl=0;
139 long totregsg=0;
140 long totregsx=0;
141 bool totper=false;
142 long int max_elapsed=0;
143 long long int iyear, imonth, iday;
144 bool realt;
145 bool userip;
146 struct tm tt;
147 struct tm *t;
148 unsigned long recs1=0UL;
149 unsigned long recs2=0UL;
150 int OutputNonZero = REPORT_EVERY_X_LINES ;
151 bool download_flag=false;
152 char *download_url=NULL;
153 struct getwordstruct gwarea;
154 longline line;
155 time_t tnum;
156 struct stat logstat;
157 struct userinfostruct *uinfo;
158 struct userfilestruct *first_user_file, *ufile, *ufile1, *prev_ufile;
159 static int split=0;
160 static int convert=0;
161 static int output_css=0;
162 int option_index;
163 static struct option long_options[]=
164 {
165 {"convert",no_argument,&convert,1},
166 {"css",no_argument,&output_css,1},
167 {"split",no_argument,&split,1},
168 {0,0,0,0}
169 };
170
171 #ifdef HAVE_LOCALE_H
172 setlocale(LC_TIME,"");
173 #endif
174
175 #if defined(ENABLE_NLS) && defined(HAVE_LOCALE_H)
176 if (!setlocale (LC_ALL, "")) {
177 fprintf(stderr,"SARG: Cannot set the locale LC_ALL to the environment variable\n");
178 exit(EXIT_FAILURE);
179 }
180 if (!bindtextdomain (PACKAGE_NAME, LOCALEDIR)) {
181 fprintf(stderr,"SARG: Cannot bind to text domain %s in directory %s (%s)\n",PACKAGE_NAME,LOCALEDIR,strerror(errno));
182 exit(EXIT_FAILURE);
183 }
184 if (!textdomain (PACKAGE_NAME)) {
185 fprintf(stderr,"SARG: Cannot set gettext domain for %s PACKAGE_NAME (%s)\n",PACKAGE_NAME,strerror(errno));
186 exit(EXIT_FAILURE);
187 }
188 #endif //ENABLE_NLS
189
190 BgImage[0]='\0';
191 LogoImage[0]='\0';
192 LogoText[0]='\0';
193 PasswdFile[0]='\0';
194 OutputEmail[0]='\0';
195 UserAgentLog[0]='\0';
196 ExcludeHosts[0]='\0';
197 ExcludeUsers[0]='\0';
198 ConfigFile[0]='\0';
199 code[0]='\0';
200 LastLog=0;
201 ReportType=0UL;
202 UserTabFile[0]='\0';
203 BlockIt[0]='\0';
204 ExternalCSSFile[0]='\0';
205 RedirectorLogFormat[0]='\0';
206 NRedirectorLogs=0;
207 for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
208
209 sprintf(ExcludeCodes,"%s/exclude_codes",SYSCONFDIR);
210 strcpy(GraphDaysBytesBarColor,"orange");
211 strcpy(BgColor,"#ffffff");
212 strcpy(TxColor,"#000000");
213 strcpy(TxBgColor,"lavender");
214 strcpy(TiColor,"darkblue");
215 strcpy(Width,"80");
216 strcpy(Height,"45");
217 strcpy(LogoTextColor,"#000000");
218 strcpy(HeaderColor,"darkblue");
219 strcpy(HeaderBgColor,"#dddddd");
220 strcpy(LogoTextColor,"#006699");
221 strcpy(FontSize,"9px");
222 strcpy(TempDir,"/tmp");
223 strcpy(OutputDir,"/var/www/html/squid-reports");
224 Ip2Name=false;
225 strcpy(DateFormat,"u");
226 OverwriteReport=false;
227 RemoveTempFiles=true;
228 strcpy(ReplaceIndex,"index.html");
229 Index=INDEX_YES;
230 RecordsWithoutUser=RECORDWITHOUTUSER_IP;
231 UseComma=0;
232 strcpy(MailUtility,"mailx");
233 TopSitesNum=100;
234 TopUsersNum=0;
235 UserIp=0;
236 TopuserSort=TOPUSER_SORT_BYTES | TOPUSER_SORT_REVERSE;
237 UserSort=USER_SORT_BYTES | USER_SORT_REVERSE;
238 TopsitesSort=TOPSITE_SORT_CONNECT | TOPSITE_SORT_REVERSE;
239 LongUrl=0;
240 strcpy(FontFace,"Verdana,Tahoma,Arial");
241 datetimeby=DATETIME_BYTE;
242 strcpy(CharSet,"ISO-8859-1");
243 Privacy=0;
244 strcpy(PrivacyString,"***.***.***.***");
245 strcpy(PrivacyStringColor,"blue");
246 SuccessfulMsg=true;
247 TopUserFields=TOPUSERFIELDS_NUM | TOPUSERFIELDS_DATE_TIME | TOPUSERFIELDS_USERID | TOPUSERFIELDS_CONNECT |
248 TOPUSERFIELDS_BYTES | TOPUSERFIELDS_SETYB | TOPUSERFIELDS_IN_CACHE_OUT |
249 TOPUSERFIELDS_USED_TIME | TOPUSERFIELDS_MILISEC | TOPUSERFIELDS_PTIME |
250 TOPUSERFIELDS_TOTAL | TOPUSERFIELDS_AVERAGE;
251 UserReportFields=USERREPORTFIELDS_CONNECT | USERREPORTFIELDS_BYTES | USERREPORTFIELDS_SETYB |
252 USERREPORTFIELDS_IN_CACHE_OUT | USERREPORTFIELDS_USED_TIME | USERREPORTFIELDS_MILISEC |
253 USERREPORTFIELDS_PTIME | USERREPORTFIELDS_TOTAL | USERREPORTFIELDS_AVERAGE;
254 strcpy(DataFileDelimiter,";");
255 DataFileFields=DATA_FIELD_USER | DATA_FIELD_DATE | DATA_FIELD_TIME | DATA_FIELD_URL | DATA_FIELD_CONNECT |
256 DATA_FIELD_BYTES | DATA_FIELD_IN_CACHE | DATA_FIELD_OUT_CACHE | DATA_FIELD_ELAPSED;
257 ShowReadStatistics=true;
258 strcpy(IndexSortOrder,"D");
259 ShowSargInfo=true;
260 ShowSargLogo=true;
261 strcpy(ParsedOutputLog,"no");
262 strcpy(ParsedOutputLogCompress,"/bin/gzip -f");
263 DisplayedValues=DISPLAY_ABBREV;
264 strcpy(HeaderFontSize,"9px");
265 strcpy(TitleFontSize,"11px");
266 strcpy(AuthUserTemplateFile,"sarg_htaccess");
267 set_download_suffix("7z,ace,arj,avi,bat,bin,bz2,bzip,cab,com,cpio,dll,doc,dot,exe,gz,iso,lha,lzh,mdb,mov,mp3,mpeg,mpg,mso,nrg,ogg,ppt,rar,rtf,shs,src,sys,tar,tgz,vcd,vob,wma,wmv,zip");
268 Graphs=true;
269 #if defined(FONTDIR)
270 strcpy(GraphFont,FONTDIR"/DejaVuSans.ttf");
271 #else
272 GraphFont[0]='\0';
273 #endif
274 strcpy(Ulimit,"20000");
275 NtlmUserFormat=NTLMUSERFORMAT_DOMAINUSER;
276 IndexTree=INDEX_TREE_FILE;
277 strcpy(RealtimeTypes,"GET,PUT,CONNECT");
278 RealtimeUnauthRec=REALTIME_UNAUTH_REC_SHOW;
279 RedirectorFilterOutDate=true;
280 DansguardianFilterOutDate=true;
281 DataFileUrl=DATAFILEURL_IP;
282 strcpy(MaxElapsed,"28800000");
283 BytesInSitesUsersReport=0;
284 UserAuthentication=0;
285 strcpy(LDAPHost,"127.0.0.1");
286 LDAPPort=389;
287 LDAPProtocolVersion=3;
288 LDAPBindDN[0]='\0';
289 LDAPBindPW[0]='\0';
290 LDAPBaseSearch[0]='\0';
291 strcpy(LDAPFilterSearch, "uid=%s");
292 strcpy(LDAPTargetAttr, "cn");
293
294 dia[0]='\0';
295 mes[0]='\0';
296 ano[0]='\0';
297 hora[0]='\0';
298 tmp[0]='\0';
299 tmp3[0]='\0';
300 wtemp[0]='\0';
301 us[0]='\0';
302 date[0]='\0';
303 df[0]='\0';
304 uagent[0]='\0';
305 hexclude[0]='\0';
306 addr[0]='\0';
307 hm[0]='\0';
308 hmf[0]='\0';
309 site[0]='\0';
310 outdir[0]='\0';
311 elap[0]='\0';
312 email[0]='\0';
313 zip[0]='\0';
314 UserInvalidChar[0]='\0';
315 DataFile[0]='\0';
316 SquidGuardConf[0]='\0';
317 DansGuardianConf[0]='\0';
318 start_hour[0]='\0';
319 end_hour[0]='\0';
320
321 denied_count=0;
322 download_count=0;
323 authfail_count=0;
324 dansguardian_count=0;
325 squidguard_count=0;
326 useragent_count=0;
327 DeniedReportLimit=10;
328 AuthfailReportLimit=10;
329 DansGuardianReportLimit=10;
330 SquidGuardReportLimit=10;
331 DownloadReportLimit=50;
332 UserReportLimit=0;
333 debug=0;
334 debugz=0;
335 debugm=0;
336 iprel=false;
337 userip=false;
338 realt=false;
339 realtime_refresh=3;
340 realtime_access_log_lines=1000;
341 cost=0.01;
342 nocost=50000000;
343 ndownload=0;
344 squid24=false;
345 dfrom=0;
346 duntil=0;
347
348 bzero(IncludeUsers, sizeof(IncludeUsers));
349 bzero(ExcludeString, sizeof(ExcludeString));
350 first_user_file=NULL;
351 memset(&period,0,sizeof(period));
352
353 NAccessLog=0;
354 for(x=0; x<MAXLOGS; x++)
355 AccessLog[x][0]='\0';
356 AccessLogFromCmdLine=0;
357 RedirectorLogFromCmdLine=0;
358
359 strcpy(Title,_("Squid User Access Report"));
360
361 while((ch = getopt_long_only(argc, argv, "a:b:c:d:e:f:g:u:l:L:o:s:t:w:hijmnprvxyz",long_options,&option_index)) != -1){
362 switch(ch)
363 {
364 case 0:
365 break;
366 case 'a':
367 strcpy(addr,optarg);
368 break;
369 case 'b':
370 strcpy(uagent,optarg);
371 break;
372 case 'c':
373 strcpy(hexclude,optarg);
374 break;
375 case 'd':
376 strncpy(date,optarg,sizeof(date)-1);
377 date[sizeof(date)-1]='\0';
378 date_from(date, &dfrom, &duntil);
379 break;
380 case 'e':
381 strcpy(email,optarg);
382 break;
383 case 'f':
384 strcpy(ConfigFile,optarg);
385 break;
386 case 'g':
387 strcpy(df,optarg);
388 break;
389 case 'h':
390 usage(argv[0]);
391 exit(EXIT_SUCCESS);
392 case 'i':
393 iprel=true;
394 break;
395 case 'l':
396 if (NAccessLog>=MAXLOGS) {
397 debuga(_("Too many log files passed on command line with option -l.\n"));
398 exit(EXIT_FAILURE);
399 }
400 if (strlen(optarg)>=MAX_LOG_FILELEN) {
401 debuga(_("Log file name too long passed on command line with option -l: %s\n"),optarg);
402 exit(EXIT_FAILURE);
403 }
404 strcpy(AccessLog[NAccessLog],optarg);
405 NAccessLog++;
406 AccessLogFromCmdLine++;
407 break;
408 case 'L':
409 if (NRedirectorLogs>MAX_REDIRECTOR_LOGS) {
410 debuga(_("Too many redirector logs passed on command line with option -L.\n"));
411 exit(EXIT_FAILURE);
412 }
413 if (strlen(optarg)>=MAX_REDIRECTOR_FILELEN) {
414 debuga(_("Redirector log file name too long passed on command line with opton -L: %s\n"),optarg);
415 exit(EXIT_FAILURE);
416 }
417 strcpy(RedirectorLogs[NRedirectorLogs],optarg);
418 NRedirectorLogs++;
419 RedirectorLogFromCmdLine++;
420 break;
421 case 'm':
422 debugm++;
423 break;
424 case 'n':
425 dns=true;
426 break;
427 case 'o':
428 strcpy(outdir,optarg);
429 break;
430 case 'p':
431 userip=true;
432 break;
433 case 'r':
434 realt=true;
435 break;
436 case 's':
437 strcpy(site,optarg);
438 break;
439 case 't':
440 {
441 int h,m;
442
443 if(strstr(optarg,"-") == 0) {
444 strcpy(hm,optarg);
445 strcpy(hmf,optarg);
446 } else {
447 getword_start(&gwarea,optarg);
448 if (getword(hm,sizeof(hm),&gwarea,'-')<0 || getword(hmf,sizeof(hmf),&gwarea,'\0')<0) {
449 debuga(_("The time range passed on the command line with option -t is invalid\n"));
450 exit(EXIT_FAILURE);
451 }
452 }
453 if(sscanf(hm,"%d:%d",&h,&m)!=2) {
454 debuga(_("Time period must be MM or MM:SS. Exit\n"));
455 exit(EXIT_FAILURE);
456 }
457 sprintf(hm,"%02d%02d",h,m);
458 if(sscanf(hmf,"%d:%d",&h,&m)!=2) {
459 debuga(_("Time period must be MM or MM:SS. Exit\n"));
460 exit(EXIT_FAILURE);
461 }
462 sprintf(hmf,"%02d%02d",h,m);
463 break;
464 }
465 case 'u':
466 strcpy(us,optarg);
467 break;
468 case 'v':
469 version();
470 break;
471 case 'w':
472 strcpy(tmp,optarg);
473 break;
474 case 'x':
475 debug++;
476 break;
477 case 'y':
478 langcode++;
479 break;
480 case 'z':
481 debugz++;
482 break;
483 /*case ':':
484 debuga(_("Option -%c require an argument\n"),optopt);
485 errflg++;
486 break;*/
487 case '?':
488 usage(argv[0]);
489 exit(EXIT_FAILURE);
490 default:
491 abort();
492 }
493 }
494
495 if (errflg>0) {
496 usage(argv[0]);
497 exit(2);
498 }
499
500 if (optind<argc) {
501 for (iarq=optind ; iarq<argc ; iarq++) {
502 if (NAccessLog>=MAXLOGS) {
503 debuga(_("Too many log files passed on command line.\n"));
504 exit(EXIT_FAILURE);
505 }
506 if (strlen(argv[iarq])>=MAX_LOG_FILELEN) {
507 debuga(_("Log file name too long passed on command line: %s\n"),argv[iarq]);
508 exit(EXIT_FAILURE);
509 }
510 strcpy(AccessLog[NAccessLog],argv[iarq]);
511 NAccessLog++;
512 AccessLogFromCmdLine++;
513 }
514 }
515
516 if(debug) debuga(_("Init\n"));
517
518 if(ConfigFile[0] == '\0') snprintf(ConfigFile,sizeof(ConfigFile),"%s/sarg.conf",SYSCONFDIR);
519 if(access(ConfigFile, R_OK) != 0) {
520 debuga(_("Cannot open config file: %s - %s\n"),ConfigFile,strerror(errno));
521 exit(EXIT_FAILURE);
522 }
523
524 if(access(ConfigFile, R_OK) == 0)
525 getconf();
526
527 if(userip) UserIp=true;
528
529 if(dns) Ip2Name=true;
530
531 if(realt) {
532 realtime();
533 exit(EXIT_SUCCESS);
534 }
535
536 if(IndexTree == INDEX_TREE_FILE)
537 strcpy(ImageFile,"../images");
538 else
539 strcpy(ImageFile,"../../../images");
540
541 dataonly=0;
542 if(DataFile[0] != '\0')
543 dataonly++;
544
545 if(NAccessLog == 0) {
546 strcpy(AccessLog[0],"/var/log/squid/access.log");
547 NAccessLog++;
548 }
549
550 if(output_css) {
551 css_content(stdout);
552 exit(EXIT_SUCCESS);
553 }
554 if(split) {
555 for (iarq=0 ; iarq<NAccessLog ; iarq++)
556 splitlog(AccessLog[iarq], df, dfrom, duntil, convert);
557 exit(EXIT_SUCCESS);
558 }
559 if(convert) {
560 for (iarq=0 ; iarq<NAccessLog ; iarq++)
561 convlog(AccessLog[iarq], df, dfrom, duntil);
562 exit(EXIT_SUCCESS);
563 }
564
565 load_excludecodes(ExcludeCodes);
566
567 if(access(PasswdFile, R_OK) == 0) {
568 getusers(PasswdFile,debug);
569 puser++;
570 }
571
572 if(hexclude[0] == '\0')
573 strcpy(hexclude,ExcludeHosts);
574 if(hexclude[0] != '\0') {
575 gethexclude(hexclude,debug);
576 fhost=true;
577 }
578
579 if(ReportType == 0) {
580 ReportType=REPORT_TYPE_TOPUSERS | REPORT_TYPE_TOPSITES | REPORT_TYPE_USERS_SITES |
581 REPORT_TYPE_SITES_USERS | REPORT_TYPE_DATE_TIME | REPORT_TYPE_DENIED |
582 REPORT_TYPE_AUTH_FAILURES | REPORT_TYPE_SITE_USER_TIME_DATE | REPORT_TYPE_DOWNLOADS;
583 }
584
585 if(access(ExcludeUsers, R_OK) == 0) {
586 getuexclude(ExcludeUsers,debug);
587 fuser=true;
588 }
589
590 indexonly=0;
591 if(fuser) {
592 if(is_indexonly())
593 indexonly++;
594 }
595 if(strcmp(ExcludeUsers,"indexonly") == 0) indexonly++;
596 if(Index == INDEX_ONLY) indexonly++;
597
598 if(MaxElapsed[0] != '\0') max_elapsed=atol(MaxElapsed);
599
600 if(outdir[0] == '\0') strcpy(outdir,OutputDir);
601 strcat(outdir,"/");
602
603 if(uagent[0] == '\0') strcpy(uagent,UserAgentLog);
604
605 if(tmp[0] == '\0') strcpy(tmp,TempDir);
606 else strcpy(TempDir,tmp);
607 /*
608 For historical reasons, the temporary directory is the subdirectory "sarg" of the path
609 provided by the user.
610 */
611 strcat(tmp,"/sarg");
612
613 if (tmp[0]!='\0' && strncmp(outdir,tmp,strlen(tmp))==0) {
614 debuga(_("The output directory \"%s\" must be outside of the temporary directory \"%s\"\n"),outdir,tmp);
615 exit(EXIT_FAILURE);
616 }
617
618 if(df[0] == '\0') strcpy(df,DateFormat);
619 else strcpy(DateFormat,df);
620
621 if(df[0] == '\0') {
622 strcpy(df,"u");
623 strcpy(DateFormat,"u");
624 }
625 if (df[0]=='w')
626 IndexTree=INDEX_TREE_FILE;
627
628 if(email[0] == '\0' && OutputEmail[0] != '\0') strcpy(email,OutputEmail);
629
630 if(email[0] != '\0') {
631 my_mkdir(tmp);
632 strcpy(outdir,tmp);
633 strcat(outdir,"/");
634 }
635
636 if(access(tmp, R_OK) == 0) {
637 unlinkdir(tmp,1);
638 }
639 my_mkdir(tmp);
640 snprintf(denied_unsort,sizeof(denied_unsort),"%s/denied.log.unsort",tmp);
641 snprintf(denied_sort,sizeof(denied_sort),"%s/denied.log",tmp);
642 snprintf(authfail_unsort,sizeof(authfail_unsort),"%s/authfail.log.unsort",tmp);
643
644 if(debug) {
645 debuga(_("Parameters:\n"));
646 debuga(_(" Hostname or IP address (-a) = %s\n"),addr);
647 debuga(_(" Useragent log (-b) = %s\n"),uagent);
648 debuga(_(" Exclude file (-c) = %s\n"),hexclude);
649 debuga(_(" Date from-until (-d) = %s\n"),date);
650 debuga(_(" Email address to send reports (-e) = %s\n"),email);
651 debuga(_(" Config file (-f) = %s\n"),ConfigFile);
652 if(strcmp(df,"e") == 0)
653 debuga(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
654 if(strcmp(df,"u") == 0)
655 debuga(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
656 if(strcmp(df,"w") == 0)
657 debuga(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
658 debuga(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
659 for (iarq=0 ; iarq<NAccessLog ; iarq++)
660 debuga(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
661 for (iarq=0 ; iarq<NRedirectorLogs ; iarq++)
662 debuga(_(" Redirector log (-L) = %s\n"),RedirectorLogs[iarq]);
663 debuga(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
664 debuga(_(" Output dir (-o) = %s\n"),outdir);
665 debuga(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
666 debuga(_(" Accessed site (-s) = %s\n"),site);
667 debuga(_(" Time (-t) = %s\n"),hm);
668 debuga(_(" User (-u) = %s\n"),us);
669 debuga(_(" Temporary dir (-w) = %s\n"),tmp);
670 debuga(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
671 debuga(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
672 debuga("\n");
673 }
674
675 if(debugm) {
676 printf(_("Parameters:\n"));
677 printf(_(" Hostname or IP address (-a) = %s\n"),addr);
678 printf(_(" Useragent log (-b) = %s\n"),uagent);
679 printf(_(" Exclude file (-c) = %s\n"),hexclude);
680 printf(_(" Date from-until (-d) = %s\n"),date);
681 printf(_(" Email address to send reports (-e) = %s\n"),email);
682 printf(_(" Config file (-f) = %s\n"),ConfigFile);
683 if(strcmp(df,"e") == 0)
684 printf(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
685 if(strcmp(df,"u") == 0)
686 printf(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
687 if(strcmp(df,"w") == 0)
688 printf(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
689 printf(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
690 for (iarq=0 ; iarq<NAccessLog ; iarq++)
691 printf(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
692 for (iarq=0 ; iarq<NRedirectorLogs ; iarq++)
693 printf(_(" Redirector log (-L) = %s\n"),RedirectorLogs[iarq]);
694 printf(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
695 printf(_(" Output dir (-o) = %s\n"),outdir);
696 printf(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
697 printf(_(" Accessed site (-s) = %s\n"),site);
698 printf(_(" Time (-t) = %s\n"),hm);
699 printf(_(" User (-u) = %s\n"),us);
700 printf(_(" Temporary dir (-w) = %s\n"),tmp);
701 printf(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
702 printf(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
703 printf(_("sarg version: %s\n"),VERSION);
704 }
705
706 if(debug)
707 debuga(_("sarg version: %s\n"),VERSION);
708
709 #ifdef ENABLE_DOUBLE_CHECK_DATA
710 debuga(_("Sarg compiled to report warnings if the output is inconsistent\n"));
711 #endif
712
713 maxopenfiles=MAX_OPEN_USER_FILES;
714 #ifdef HAVE_RLIM_T
715 if (Ulimit[0] != '\0') {
716 struct rlimit rl;
717 long l1, l2;
718 int rc=0;
719
720 #if defined(RLIMIT_NOFILE)
721 getrlimit (RLIMIT_NOFILE, &rl);
722 #elif defined(RLIMIT_OFILE)
723 getrlimit (RLIMIT_OFILE, &rl);
724 #else
725 #warning "No rlimit resource for the number of open files"
726 #endif
727 l1 = rl.rlim_cur;
728 l2 = rl.rlim_max;
729
730 rl.rlim_cur = atol(Ulimit);
731 rl.rlim_max = atol(Ulimit);
732 #if defined(RLIMIT_NOFILE)
733 rc=setrlimit (RLIMIT_NOFILE, &rl);
734 #elif defined(RLIMIT_OFILE)
735 rc=setrlimit (RLIMIT_OFILE, &rl);
736 #else
737 #warning "No rlimit resource for the number of open files"
738 #endif
739 if(rc == -1) {
740 debuga(_("setrlimit error - %s\n"),strerror(errno));
741 }
742
743 if(debug)
744 debuga("Maximum file descriptor: cur=%ld max=%ld, changed to cur="RLIM_STRING" max="RLIM_STRING,l1,l2,rl.rlim_cur,rl.rlim_max);
745 }
746 #endif
747
748 init_usertab(UserTabFile);
749
750 if ((line=longline_create())==NULL) {
751 debuga(_("Not enough memory to read a log file\n"));
752 exit(EXIT_FAILURE);
753 }
754
755 snprintf(sz_Download_Unsort,sizeof(sz_Download_Unsort),"%s/download.unsort", tmp);
756
757 if(DataFile[0]=='\0') {
758 if((ReportType & REPORT_TYPE_DENIED) != 0) {
759 if((fp_denied=MY_FOPEN(denied_unsort,"w"))==NULL) {
760 debuga(_("(log) Cannot open file: %s - %s\n"),denied_unsort,strerror(errno));
761 exit(EXIT_FAILURE);
762 }
763 }
764
765 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
766 if((fp_authfail=MY_FOPEN(authfail_unsort,"w"))==NULL) {
767 debuga(_("(log) Cannot open file: %s - %s\n"),authfail_unsort,strerror(errno));
768 exit(EXIT_FAILURE);
769 }
770 }
771 }
772
773 for (iarq=0 ; iarq<NAccessLog ; iarq++) {
774 strcpy(arq,AccessLog[iarq]);
775
776 strcpy(arqtt,arq);
777
778 if(strcmp(arq,"-")==0) {
779 if(debug)
780 debuga(_("Reading access log file: from stdin\n"));
781 fp_in=stdin;
782 from_stdin=true;
783 } else {
784 if (date[0]!='\0') {
785 if (stat(arq,&logstat)!=0) {
786 debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
787 } else {
788 struct tm *logtime=localtime(&logstat.st_mtime);
789 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
790 debuga(_("Ignoring old log file %s\n"),arq);
791 continue;
792 }
793 }
794 }
795 fp_in=decomp(arq,&from_pipe);
796 if(fp_in==NULL) {
797 debuga(_("(log) Cannot open log file: %s - %s\n"),arq,strerror(errno));
798 exit(EXIT_FAILURE);
799 }
800 if(debug) debuga(_("Reading access log file: %s\n"),arq);
801 from_stdin=false;
802 }
803 ilf=ILF_Unknown;
804 download_flag=false;
805 // pre-read the file only if we have to show stats
806 if(ShowReadStatistics && !from_stdin && !from_pipe) {
807 size_t nread,i;
808 bool skipcr=false;
809 char tmp4[MAXLEN];
810
811 recs1=0UL;
812 recs2=0UL;
813
814 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
815 for (i=0 ; i<nread ; i++)
816 if (skipcr) {
817 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
818 skipcr=false;
819 }
820 } else {
821 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
822 skipcr=true;
823 recs1++;
824 }
825 }
826 }
827 rewind(fp_in);
828 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
829 putchar('\r');
830 fflush( stdout ) ;
831 }
832
833 longline_reset(line);
834
835 while ((linebuf=longline_read(fp_in,line))!=NULL) {
836 blen=strlen(linebuf);
837
838 if (ilf==ILF_Unknown) {
839 if(strncmp(linebuf,"#Software: Mic",14) == 0) {
840 fixendofline(linebuf);
841 if (debug)
842 debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
843 ilf=ILF_Isa;
844 ilf_count[ilf]++;
845 continue;
846 }
847
848 if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
849 if (getperiod_fromsarglog(arqtt,&period)<0) {
850 debuga(_("The name of the file is invalid: %s\n"),arq);
851 exit(EXIT_FAILURE);
852 }
853 ilf=ILF_Sarg;
854 ilf_count[ilf]++;
855 continue;
856 }
857 }
858
859 if(!fp_log && strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg) {
860 if(access(ParsedOutputLog,R_OK) != 0) {
861 my_mkdir(ParsedOutputLog);
862 }
863 sprintf(arq_log,"%s/sarg_temp.log",ParsedOutputLog);
864 if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
865 debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
866 exit(EXIT_FAILURE);
867 }
868 fputs("*** SARG Log ***\n",fp_log);
869 }
870
871 recs2++;
872 if( ShowReadStatistics && !from_stdin && !from_pipe && --OutputNonZero<=0) {
873 double perc = recs2 * 100. / recs1 ;
874 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
875 putchar('\r');
876 fflush (stdout);
877 OutputNonZero = REPORT_EVERY_X_LINES ;
878 }
879 if(blen < 58) continue;
880 if(strstr(linebuf,"HTTP/0.0") != 0) continue;
881 if(strstr(linebuf,"logfile turned over") != 0) continue;
882 if(linebuf[0] == ' ') continue;
883
884 // exclude_string
885 if(ExcludeString[0] != '\0') {
886 bool exstring=false;
887 getword_start(&gwarea,ExcludeString);
888 while(strchr(gwarea.current,':') != 0) {
889 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
890 debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
891 exit(EXIT_FAILURE);
892 }
893 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
894 exstring=true;
895 break;
896 }
897 }
898 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
899 exstring=true;
900 if(exstring) continue;
901 }
902
903 totregsl++;
904 if(debugm)
905 printf("BUF=%s\n",linebuf);
906
907 t=NULL;
908 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
909 getword_start(&gwarea,linebuf);
910 if (getword(data,sizeof(data),&gwarea,' ')<0) {
911 debuga(_("Maybe you have a broken time in your access.log file\n"));
912 exit(EXIT_FAILURE);
913 }
914 if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
915 strcpy(ip,data);
916 strcpy(elap,"0");
917 if(squid24) {
918 if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
919 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
920 exit(EXIT_FAILURE);
921 }
922 } else {
923 if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
924 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
925 exit(EXIT_FAILURE);
926 }
927 }
928 if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
929 getword(fun,sizeof(fun),&gwarea,' ')<0) {
930 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
931 exit(EXIT_FAILURE);
932 }
933 if (getword_ptr(linebuf,&url,&gwarea,' ')<0) {
934 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
935 exit(EXIT_FAILURE);
936 }
937 if (getword_skip(MAXLEN,&gwarea,' ')<0) {
938 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
939 exit(EXIT_FAILURE);
940 }
941 if (getword(code2,sizeof(code2),&gwarea,' ')<0) {
942 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
943 exit(EXIT_FAILURE);
944 }
945 if (getword(tam,sizeof(tam),&gwarea,' ')<0) {
946 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
947 exit(EXIT_FAILURE);
948 }
949 if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
950 if (getword(code,sizeof(code),&gwarea,' ')<0) {
951 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
952 exit(EXIT_FAILURE);
953 }
954 } else {
955 if (getword(code,sizeof(code),&gwarea,'\0')<0) {
956 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
957 exit(EXIT_FAILURE);
958 }
959 }
960
961 if ((str = strchr(code, ':')) != NULL)
962 *str = '/';
963
964 if(strcmp(tam,"\0") == 0)
965 strcpy(tam,"0");
966
967 ilf=ILF_Common;
968 ilf_count[ilf]++;
969
970 getword_start(&gwarea,data+1);
971 if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
972 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
973 exit(EXIT_FAILURE);
974 }
975 if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
976 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
977 exit(EXIT_FAILURE);
978 }
979 getword_start(&gwarea,data);
980 if (getword_atoll(&iday,&gwarea,'/')<0){
981 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
982 exit(EXIT_FAILURE);
983 }
984 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
985 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
986 exit(EXIT_FAILURE);
987 }
988 if (getword_atoll(&iyear,&gwarea,'/')<0){
989 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
990 exit(EXIT_FAILURE);
991 }
992
993 imonth=month2num(mes)+1;
994 idata=builddia(iday,imonth,iyear);
995 computedate(iyear,imonth,iday,&tt);
996 t=&tt;
997 }
998
999 if(ilf==ILF_Unknown || ilf==ILF_Squid) {
1000 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
1001 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
1002 exit(EXIT_FAILURE);
1003 }
1004 while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
1005 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
1006 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
1007 exit(EXIT_FAILURE);
1008 }
1009 if(strlen(elap) < 1) continue;
1010 if (getword(ip,sizeof(ip),&gwarea,' ')<0){
1011 debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq);
1012 exit(EXIT_FAILURE);
1013 }
1014 if (getword(code,sizeof(code),&gwarea,' ')<0){
1015 debuga(_("Maybe you have a broken result code in your %s file\n"),arq);
1016 exit(EXIT_FAILURE);
1017 }
1018 if (getword(tam,sizeof(tam),&gwarea,' ')<0){
1019 debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq);
1020 exit(EXIT_FAILURE);
1021 }
1022 if (getword(fun,sizeof(fun),&gwarea,' ')<0){
1023 debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
1024 exit(EXIT_FAILURE);
1025 }
1026 if (getword_ptr(linebuf,&url,&gwarea,' ')<0){
1027 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
1028 exit(EXIT_FAILURE);
1029 }
1030 if (getword(user,sizeof(user),&gwarea,' ')<0){
1031 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
1032 exit(EXIT_FAILURE);
1033 }
1034 ilf=ILF_Squid;
1035 ilf_count[ilf]++;
1036
1037 tnum=atoi(data);
1038 t=localtime(&tnum);
1039 if (t == NULL) {
1040 debuga(_("Cannot convert the timestamp from the squid log file\n"));
1041 exit(EXIT_FAILURE);
1042 }
1043
1044 strftime(tbuf2, sizeof(tbuf2), "%H%M", t);
1045
1046 idata=(t->tm_year+1900)*10000+(t->tm_mon+1)*100+t->tm_mday;
1047 }
1048 }
1049 if (ilf==ILF_Sarg) {
1050 getword_start(&gwarea,linebuf);
1051 if (getword(data,sizeof(data),&gwarea,'\t')<0){
1052 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1053 exit(EXIT_FAILURE);
1054 }
1055 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
1056 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1057 exit(EXIT_FAILURE);
1058 }
1059 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
1060 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1061 exit(EXIT_FAILURE);
1062 }
1063 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
1064 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1065 exit(EXIT_FAILURE);
1066 }
1067 if (getword_ptr(linebuf,&url,&gwarea,'\t')<0){
1068 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1069 exit(EXIT_FAILURE);
1070 }
1071 if (getword(tam,sizeof(tam),&gwarea,'\t')<0){
1072 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1073 exit(EXIT_FAILURE);
1074 }
1075 if (getword(code,sizeof(code),&gwarea,'\t')<0){
1076 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1077 exit(EXIT_FAILURE);
1078 }
1079 if (getword(elap,sizeof(elap),&gwarea,'\t')<0){
1080 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1081 exit(EXIT_FAILURE);
1082 }
1083 if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
1084 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1085 exit(EXIT_FAILURE);
1086 }
1087 getword_start(&gwarea,data);
1088 if (getword_atoll(&iday,&gwarea,'/')<0 || iday<1 || iday>31){
1089 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1090 exit(EXIT_FAILURE);
1091 }
1092 if (getword_atoll(&imonth,&gwarea,'/')<0 || imonth<1 || imonth>12){
1093 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1094 exit(EXIT_FAILURE);
1095 }
1096 if (getword_atoll(&iyear,&gwarea,'\0')<0){
1097 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1098 exit(EXIT_FAILURE);
1099 }
1100 idata=builddia(iday,imonth,iyear);
1101 computedate(iyear,imonth,iday,&tt);
1102 t=&tt;
1103 }
1104 if (ilf==ILF_Isa) {
1105 if (linebuf[0] == '#') {
1106 int ncols,cols[ISACOL_Last];
1107
1108 fixendofline(linebuf);
1109 getword_start(&gwarea,linebuf);
1110 // remove the #Fields: column at the beginning of the line
1111 if (getword_skip(1000,&gwarea,' ')<0){
1112 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1113 exit(EXIT_FAILURE);
1114 }
1115 for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
1116 ncols=0;
1117 while(gwarea.current[0] != '\0') {
1118 if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
1119 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1120 exit(EXIT_FAILURE);
1121 }
1122 if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
1123 if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
1124 if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
1125 if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
1126 if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
1127 if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
1128 if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
1129 if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
1130 ncols++;
1131 }
1132 if (cols[ISACOL_Ip]>=0) {
1133 isa_ncols=ncols;
1134 for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
1135 isa_cols[ncols]=cols[ncols];
1136 }
1137 continue;
1138 }
1139 if (!isa_ncols) continue;
1140 getword_start(&gwarea,linebuf);
1141 for (x=0 ; x<isa_ncols ; x++) {
1142 if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
1143 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1144 exit(EXIT_FAILURE);
1145 }
1146 if (x==isa_cols[ISACOL_Ip]) {
1147 if (strlen(str)>=sizeof(ip)) {
1148 debuga(_("Maybe you have a broken IP in your %s file\n"),arq);
1149 exit(EXIT_FAILURE);
1150 }
1151 strcpy(ip,str);
1152 } else if (x==isa_cols[ISACOL_UserName]) {
1153 if (strlen(str)>=sizeof(user)) {
1154 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
1155 exit(EXIT_FAILURE);
1156 }
1157 strcpy(user,str);
1158 } else if (x==isa_cols[ISACOL_Date]) {
1159 if (strlen(str)>=sizeof(data)) {
1160 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1161 exit(EXIT_FAILURE);
1162 }
1163 strcpy(data,str);
1164 } else if (x==isa_cols[ISACOL_Time]) {
1165 if (strlen(str)>=sizeof(hora)) {
1166 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1167 exit(EXIT_FAILURE);
1168 }
1169 strcpy(hora,str);
1170 } else if (x==isa_cols[ISACOL_TimeTaken]) {
1171 if (strlen(str)>=sizeof(elap)) {
1172 debuga(_("Maybe you have a broken download duration in your %s file\n"),arq);
1173 exit(EXIT_FAILURE);
1174 }
1175 strcpy(elap,str);
1176 } else if (x==isa_cols[ISACOL_Bytes]) {
1177 if (strlen(str)>=sizeof(tam)) {
1178 debuga(_("Maybe you have a broken download size in your %s file\n"),arq);
1179 exit(EXIT_FAILURE);
1180 }
1181 strcpy(tam,str);
1182 } else if (x==isa_cols[ISACOL_Uri]) {
1183 url=str;
1184 } else if (x==isa_cols[ISACOL_Status]) {
1185 if (strlen(str)>=sizeof(code)) {
1186 debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
1187 exit(EXIT_FAILURE);
1188 }
1189 strcpy(code,str);
1190 }
1191 }
1192
1193 if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
1194 sprintf(val1,"DENIED/%s",code);
1195 strcpy(code,val1);
1196 }
1197 getword_start(&gwarea,data);
1198 if (getword_atoll(&iyear,&gwarea,'-')<0){
1199 debuga(_("Maybe you have a broken year in your %s file\n"),arq);
1200 exit(EXIT_FAILURE);
1201 }
1202 if (getword_atoll(&imonth,&gwarea,'-')<0){
1203 debuga(_("Maybe you have a broken month in your %s file\n"),arq);
1204 exit(EXIT_FAILURE);
1205 }
1206 if (getword_atoll(&iday,&gwarea,'\0')<0){
1207 debuga(_("Maybe you have a broken day in your %s file\n"),arq);
1208 exit(EXIT_FAILURE);
1209 }
1210
1211 idata=builddia(iday,imonth,iyear);
1212 computedate(iyear,imonth,iday,&tt);
1213 t=&tt;
1214 }
1215 if (t==NULL) {
1216 debuga(_("Unknown input log file format\n"));
1217 break;
1218 }
1219
1220 strftime(dia, sizeof(dia), "%d/%m/%Y", t);
1221 snprintf(hora,sizeof(hora),"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec);
1222
1223 if(debugm)
1224 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",date,idata,dfrom,duntil);
1225
1226 if(date[0] != '\0'){
1227 if(idata < dfrom || idata > duntil) continue;
1228 }
1229
1230 // Record only hours usage which is required
1231 if (t) {
1232 if( bsearch( &( t -> tm_wday ), weekdays.list, weekdays.len,
1233 sizeof( int ), compar ) == NULL )
1234 continue;
1235
1236 if( bsearch( &( t -> tm_hour ), hours.list, hours.len,
1237 sizeof( int ), compar ) == NULL )
1238 continue;
1239 }
1240
1241
1242 if(strlen(user) > MAX_USER_LEN) {
1243 if (debugm) printf(_("User ID too long: %s\n"),user);
1244 totregsx++;
1245 continue;
1246 }
1247
1248 // include_users
1249 if(IncludeUsers[0] != '\0') {
1250 sprintf(val1,":%s:",user);
1251 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
1252 continue;
1253 }
1254
1255 if(vercode(code)) {
1256 if (debugm) printf(_("Excluded code: %s\n"),code);
1257 totregsx++;
1258 continue;
1259 }
1260
1261 if(testvaliduserchar(user))
1262 continue;
1263
1264 #if 0
1265 if((str = strstr(user,"%20")) != NULL) {
1266 /*
1267 This is a patch introduced to solve bug #1624251 reported at sourceforge but
1268 the side effect is to truncate the name at the first space and merge the reports
1269 of people whose name is identical up to the first space.
1270
1271 The old code used to truncate the user name at the first % if a %20 was
1272 found anywhere in the string. That means the string could be truncated
1273 at the wrong place if another % occured before the %20. This new code should
1274 avoid that problem and only truncate at the space. There is no bug
1275 report indicating that anybody noticed this.
1276 */
1277 *str='\0';
1278 }
1279
1280 /*
1281 Code prior to 2.2.7 used to replace any %xx by a dot as long as a %5c was
1282 found in the user name.
1283 */
1284 while((str = strstr(user,"%5c")) != NULL) {
1285 *str='.';
1286 for (x=3 ; str[x] ; x++) str[x-2]=str[x];
1287 }
1288 #endif
1289
1290 urly=url;
1291
1292 if(ilf!=ILF_Sarg) {
1293 /*
1294 The full URL is not saved in sarg log. There is no point in testing the URL to detect
1295 a downloaded file.
1296 */
1297 download_flag=is_download_suffix(url);
1298 if (download_flag) {
1299 download_url=url;
1300 download_count++;
1301 }
1302 } else
1303 download_flag=false;
1304
1305 // remove any protocol:// at the beginning of the URL
1306 if ((str = strchr(url,'/')) != NULL && str[1] == '/') {
1307 int i;
1308
1309 str+=2;
1310 for (i=0 ; str[i] ; i++)
1311 url[i]=str[i];
1312 url[i]='\0';
1313 }
1314
1315 if(!LongUrl) {
1316 url_hostname(url,hostname,sizeof(hostname));
1317 url=hostname;
1318 }
1319
1320 if(url[0] == '\0') continue;
1321
1322 if(addr[0] != '\0'){
1323 if(strcmp(addr,ip)!=0) continue;
1324 }
1325 if(fhost) {
1326 if(!vhexclude(url)) {
1327 if (debugm) printf(_("Excluded site: %s\n"),url);
1328 totregsx++;
1329 continue;
1330 }
1331 }
1332
1333 if(hm[0] != '\0') {
1334 hmr[0]='\0';
1335 chm++;
1336 getword_start(&gwarea,hora);
1337 while(chm) {
1338 if (getword_multisep(warea,sizeof(warea),&gwarea,':')<0){
1339 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1340 exit(EXIT_FAILURE);
1341 }
1342 strncat(hmr,warea,2);
1343 chm--;
1344 }
1345 strncat(hmr,gwarea.current,2);
1346
1347 if(atoi(hmr) < atoi(hm) || atoi(hmr) > atoi(hmf)) continue;
1348 }
1349
1350 if(site[0] != '\0'){
1351 if(strstr(url,site)==0) continue;
1352 }
1353
1354 if(UserIp) {
1355 strcpy(user,ip);
1356 id_is_ip=true;
1357 } else {
1358 id_is_ip=false;
1359 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
1360 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
1361 strcpy(user,ip);
1362 id_is_ip=true;
1363 }
1364 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
1365 continue;
1366 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
1367 strcpy(user,"everybody");
1368 } else {
1369 strlow(user);
1370 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
1371 if((str = strchr(user,'_')) != 0) {
1372 strcpy(warea,str+1);
1373 strcpy(user,warea);
1374 }
1375 if((str = strchr(user,'+')) != 0) {
1376 strcpy(warea,str+1);
1377 strcpy(user,warea);
1378 }
1379 }
1380 }
1381 }
1382
1383 if(us[0] != '\0'){
1384 if(strcmp(user,us)!=0) continue;
1385 }
1386
1387 if(puser) {
1388 sprintf(wuser,":%s:",user);
1389 if(strstr(userfile, wuser) == 0)
1390 continue;
1391 }
1392
1393 if(fuser) {
1394 if(!vuexclude(user)) {
1395 if (debugm) printf(_("Excluded user: %s\n"),user);
1396 totregsx++;
1397 continue;
1398 }
1399 }
1400
1401 if(strcmp(user,"-") ==0 || strcmp(user," ") ==0 || strcmp(user,"") ==0 || strcmp(user,":") ==0)
1402 continue;
1403
1404 if(max_elapsed) {
1405 if(atol(elap)>max_elapsed) {
1406 elap[0]='0';
1407 elap[1]='\0';
1408 }
1409 }
1410
1411 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
1412 fixendofline(str);
1413 sprintf(smartfilter,"\"%s\"",str+1);
1414 } else sprintf(smartfilter,"\"\"");
1415
1416 nopen=0;
1417 prev_ufile=NULL;
1418 for (ufile=first_user_file ; ufile && strcmp(user,ufile->user->id)!=0 ; ufile=ufile->next) {
1419 prev_ufile=ufile;
1420 if (ufile->file) nopen++;
1421 }
1422 if (!ufile) {
1423 ufile=malloc(sizeof(*ufile));
1424 if (!ufile) {
1425 debuga(_("Not enough memory to store the user %s\n"),user);
1426 exit(EXIT_FAILURE);
1427 }
1428 memset(ufile,0,sizeof(*ufile));
1429 ufile->next=first_user_file;
1430 first_user_file=ufile;
1431 uinfo=userinfo_create(user);
1432 ufile->user=uinfo;
1433 uinfo->id_is_ip=id_is_ip;
1434 } else {
1435 if (prev_ufile) {
1436 prev_ufile->next=ufile->next;
1437 ufile->next=first_user_file;
1438 first_user_file=ufile;
1439 }
1440 }
1441
1442 if (ufile->file==NULL) {
1443 if (nopen>=maxopenfiles) {
1444 x=0;
1445 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
1446 if (ufile1->file!=NULL) {
1447 if (x>=maxopenfiles) {
1448 if (fclose(ufile1->file)==EOF) {
1449 debuga(_("Failed to close the log file of user %s - %s\n"),ufile1->user->id,strerror(errno));
1450 exit(EXIT_FAILURE);
1451 }
1452 ufile1->file=NULL;
1453 }
1454 x++;
1455 }
1456 }
1457 }
1458 if (snprintf (tmp3, sizeof(tmp3), "%s/%s.unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
1459 debuga(_("Temporary user file name too long: %s/%s.unsort\n"), tmp, ufile->user->filename);
1460 exit(EXIT_FAILURE);
1461 }
1462 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
1463 debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno));
1464 exit (1);
1465 }
1466 }
1467
1468 /*if ( strcmp ( user , sz_Last_User ) != 0 ) {
1469 if ( fp_Write_User )
1470 fclose( fp_Write_User ) ;
1471 sprintf (tmp3, "%s/%s.unsort", tmp, user);
1472
1473 if ((fp_Write_User = MY_FOPEN (tmp3, "a")) == NULL) {
1474 fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], _("Cannot open temporary file"), tmp3, strerror(errno));
1475 exit (1);
1476 }
1477 strcpy( sz_Last_User , user ) ;
1478 }*/
1479 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,ip,url,tam,code,elap,smartfilter)<=0) {
1480 debuga(_("Write error in the log file of user %s\n"),user);
1481 exit(EXIT_FAILURE);
1482 }
1483
1484 if(fp_log && ilf!=ILF_Sarg)
1485 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,url,tam,code,elap,smartfilter);
1486
1487 totregsg++;
1488
1489 if(!dataonly && download_flag && download_url && strstr(code,"DENIED") == 0) {
1490 ndownload = 1;
1491
1492 if ( ! fp_Download_Unsort ) {
1493 if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
1494 debuga(_("(log) Cannot open temporary file: %s - %s\n"),sz_Download_Unsort, strerror(errno));
1495 exit (1);
1496 }
1497 }
1498 fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,download_url);
1499 }
1500
1501 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
1502 if(fp_denied && strstr(code,"DENIED/403") != 0) {
1503 fprintf(fp_denied, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1504 denied_count++;
1505 }
1506 if(fp_authfail && (strstr(code,"DENIED/401") != 0 || strstr(code,"DENIED/407") != 0)) {
1507 fprintf(fp_authfail, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1508 authfail_count++;
1509 }
1510 }
1511
1512 if (ilf!=ILF_Sarg) {
1513 if(!totper || idata<mindate){
1514 mindate=idata;
1515 memcpy(&period.start,t,sizeof(*t));
1516 strcpy(start_hour,tbuf2);
1517 }
1518 if (!totper || idata>maxdate) {
1519 maxdate=idata;
1520 memcpy(&period.end,t,sizeof(*t));
1521 }
1522 totper=true;
1523 }
1524
1525 if(debugm){
1526 printf("IP=\t%s\n",ip);
1527 printf("USER=\t%s\n",user);
1528 printf("ELAP=\t%s\n",elap);
1529 printf("DATE=\t%s\n",dia);
1530 printf("TIME=\t%s\n",hora);
1531 printf("FUNC=\t%s\n",fun);
1532 printf("URL=\t%s\n",url);
1533 printf("CODE=\t%s\n",code);
1534 printf("LEN=\t%s\n",tam);
1535 }
1536 }
1537 if (!from_stdin) {
1538 if (from_pipe)
1539 pclose(fp_in);
1540 else {
1541 fclose(fp_in);
1542 if( ShowReadStatistics )
1543 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs1, (float) 100 );
1544 }
1545 }
1546 }
1547
1548 if (debug)
1549 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
1550
1551 longline_destroy(&line);
1552 if ( fp_Download_Unsort )
1553 fclose (fp_Download_Unsort);
1554
1555 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
1556 ufile1=ufile->next;
1557 if (ufile->file!=NULL) fclose(ufile->file);
1558 free(ufile);
1559 }
1560
1561 free_download();
1562 free_excludecodes();
1563 free_exclude();
1564
1565 if(debug) {
1566 int totalcount=0;
1567
1568 for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
1569
1570 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
1571 debuga(_("Log with mixed records format (squid and common log)\n"));
1572
1573 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
1574 debuga(_("Common log format\n"));
1575
1576 if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
1577 debuga(_("Squid log format\n"));
1578
1579 if(ilf_count[ILF_Sarg]>0)
1580 debuga(_("Sarg log format\n"));
1581
1582 if(totalcount==0 && totregsg)
1583 debuga(_("Log with invalid format\n"));
1584 }
1585
1586 if(!totregsg){
1587 debuga(_("No records found\n"));
1588 debuga(_("End\n"));
1589 if(fp_denied) fclose(fp_denied);
1590 if(fp_authfail) fclose(fp_authfail);
1591 userinfo_free();
1592 if(userfile) free(userfile);
1593 close_usertab();
1594 exit(EXIT_SUCCESS);
1595 }
1596
1597 if (date[0]!='\0') {
1598 char date0[30], date1[30];
1599
1600 strftime(date0,sizeof(date0),"%d/%m/%Y",&period.start);
1601 strftime(date1,sizeof(date1),"%d/%m/%Y",&period.end);
1602 debuga(_("Period covered by log files: %s-%s\n"),date0,date1);
1603 getperiod_fromrange(&period,dfrom,duntil);
1604 }
1605 if (getperiod_buildtext(&period)<0) {
1606 debuga(_("Failed to build the string representation of the date range\n"));
1607 exit(EXIT_FAILURE);
1608 }
1609
1610 if(debugz){
1611 debugaz("data",dia);
1612 debugaz("period",period.text);
1613 }
1614
1615 if(debug)
1616 debuga(_("Period: %s\n"),period.text);
1617
1618 // fclose(fp_ou);
1619 if(fp_denied)
1620 fclose(fp_denied);
1621 if(fp_authfail)
1622 fclose(fp_authfail);
1623
1624 if(fp_log != NULL) {
1625 fclose(fp_log);
1626 strcpy(end_hour,tbuf2);
1627 strftime(val2,sizeof(val2),"%d%m%Y",&period.start);
1628 strftime(val1,sizeof(val1),"%d%m%Y",&period.end);
1629 sprintf(val4,"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour);
1630 if (rename(arq_log,val4)) {
1631 debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno));
1632 } else {
1633 strcpy(arq_log,val4);
1634
1635 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
1636 /*
1637 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
1638 necessary around the command name, put them in the configuration file.
1639 */
1640 sprintf(val1,"%s \"%s\"",ParsedOutputLogCompress,arq_log);
1641 cstatus=system(val1);
1642 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1643 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
1644 debuga(_("command: %s\n"),val1);
1645 exit(EXIT_FAILURE);
1646 }
1647 }
1648 }
1649 if(debug)
1650 debuga(_("Sarg parsed log saved as %s\n"),arq_log);
1651 }
1652
1653 if(DataFile[0] == '\0' && (ReportType & REPORT_TYPE_DENIED) != 0) {
1654 sprintf(csort,"sort -T \"%s\" -k 3,3 -k 5,5 -o \"%s\" \"%s\"",tmp,denied_sort,denied_unsort);
1655 cstatus=system(csort);
1656 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1657 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
1658 debuga(_("sort command: %s\n"),csort);
1659 exit(EXIT_FAILURE);
1660 }
1661 unlink(denied_unsort);
1662 }
1663
1664 sort_users_log(tmp, debug);
1665
1666 if(DataFile[0] != '\0')
1667 data_file(tmp);
1668 else
1669 gerarel();
1670
1671 if((ReportType & REPORT_TYPE_DENIED) != 0)
1672 unlink(denied_sort);
1673
1674 if(strcmp(tmp,"/tmp") != 0) {
1675 unlinkdir(tmp,0);
1676 }
1677
1678 userinfo_free();
1679 if(userfile)
1680 free(userfile);
1681 close_usertab();
1682
1683 if(debug)
1684 debuga(_("End\n"));
1685
1686 exit(EXIT_SUCCESS);
1687
1688 }
1689
1690
1691 static void getusers(const char *pwdfile, int debug)
1692 {
1693
1694 FILE *fp_usr;
1695 char buf[255];
1696 char *str;
1697 long int nreg=0;
1698
1699 if(debug)
1700 debuga(_("Loading password file from %s\n"),pwdfile);
1701
1702 if ((fp_usr = fopen(pwdfile, "r")) == NULL) {
1703 debuga(_("(getusers) Cannot open file %s - %s\n"),pwdfile,strerror(errno));
1704 exit(EXIT_FAILURE);
1705 }
1706
1707 if (fseek(fp_usr, 0, SEEK_END)==-1) {
1708 debuga(_("Failed to move till the end of the users file %s: %s\n"),pwdfile,strerror(errno));
1709 exit(EXIT_FAILURE);
1710 }
1711 nreg = ftell(fp_usr);
1712 if (nreg<0) {
1713 debuga(_("Cannot get the size of file %s\n"),pwdfile);
1714 exit(EXIT_FAILURE);
1715 }
1716 nreg = nreg+5000;
1717 if (fseek(fp_usr, 0, SEEK_SET)==-1) {
1718 debuga(_("Failed to rewind the users file %s: %s\n"),pwdfile,strerror(errno));
1719 exit(EXIT_FAILURE);
1720 }
1721
1722 if((userfile=(char *) malloc(nreg))==NULL){
1723 debuga(_("malloc error (%ld)\n"),nreg);
1724 exit(EXIT_FAILURE);
1725 }
1726
1727 bzero(userfile,nreg);
1728 strcpy(userfile,":");
1729
1730 while(fgets(buf,sizeof(buf),fp_usr)!=NULL) {
1731 str=strchr(buf,':');
1732 if (!str) {
1733 debuga(_("You have an invalid user in your %s file\n"),pwdfile);
1734 exit(EXIT_FAILURE);
1735 }
1736 str[1]='\0';
1737 strcat(userfile,buf);
1738 }
1739
1740 fclose(fp_usr);
1741
1742 return;
1743 }