]> git.ipfire.org Git - thirdparty/sarg.git/blob - log.c
Be more consistent with the use of the temporary directory (i.e. always use /tmp...
[thirdparty/sarg.git] / log.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2010
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 #ifdef HAVE_GETOPT_H
31 #include <getopt.h>
32 #endif
33
34 #define REPORT_EVERY_X_LINES 5000
35 #define MAX_OPEN_USER_FILES 10
36
37 struct userfilestruct
38 {
39 struct userfilestruct *next;
40 struct userinfostruct *user;
41 FILE *file;
42 };
43
44 /*@null@*/static char *userfile=NULL;
45
46 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
47 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
48 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
49
50 static void getusers(const char *pwdfile, int debug);
51
52 int main(int argc,char *argv[])
53 {
54 enum isa_col_id {
55 ISACOL_Ip,
56 ISACOL_UserName,
57 ISACOL_Date,
58 ISACOL_Time,
59 ISACOL_TimeTaken,
60 ISACOL_Bytes,
61 ISACOL_Uri,
62 ISACOL_Status,
63 ISACOL_Last //last entry of the list !
64 };
65 enum InputLogFormat {
66 ILF_Unknown,
67 ILF_Squid,
68 ILF_Common,
69 ILF_Sarg,
70 ILF_Isa,
71 ILF_Last //last entry of the list !
72 };
73
74 FILE *fp_in = NULL, *fp_denied=NULL, *fp_authfail=NULL, *fp_log=NULL;
75
76 char sz_Download_Unsort[ 20000 ] ;
77 FILE * fp_Download_Unsort = NULL ;
78
79 extern int optind;
80 extern int optopt;
81 extern char *optarg;
82
83 char data[255];
84 char elap[255];
85 char ip[MAXLEN];
86 char tam[255];
87 char fun[MAXLEN];
88 char wuser[MAXLEN];
89 char smartfilter[MAXLEN];
90 char dia[128];
91 char mes[30];
92 char ano[30];
93 char hora[30];
94 char wtemp[MAXLEN];
95 char date[255];
96 char arq[255];
97 char arq_log[255];
98 char hm[15], hmf[15], hmr[15];
99 int chm=0;
100 char uagent[MAXLEN];
101 char hexclude[MAXLEN];
102 char csort[MAXLEN];
103 int cstatus;
104 char tbuf2[128];
105 char zip[20];
106 char *str;
107 char tmp3[MAXLEN];
108 char denied_unsort[MAXLEN];
109 char denied_sort[MAXLEN];
110 char authfail_unsort[MAXLEN];
111 char start_hour[128];
112 char end_hour[128];
113 char *linebuf;
114 char hostname[512];
115 char *url;
116 char *urly;
117 char user[MAX_USER_LEN];
118 enum InputLogFormat ilf;
119 int ilf_count[ILF_Last];
120 int ch;
121 int x;
122 int errflg=0;
123 int puser=0;
124 bool fhost=false;
125 bool dns=false;
126 bool fuser=false;
127 int idata=0;
128 int mindate=0;
129 int maxdate=0;
130 int iarq=0;
131 int isa_ncols=0,isa_cols[ISACOL_Last];
132 bool from_stdin;
133 bool from_pipe;
134 int blen;
135 int maxopenfiles;
136 int nopen;
137 bool id_is_ip;
138 long totregsl=0;
139 long totregsg=0;
140 long totregsx=0;
141 bool totper=false;
142 long int max_elapsed=0;
143 long long int iyear, imonth, iday;
144 bool realt;
145 bool userip;
146 struct tm tt;
147 struct tm *t;
148 unsigned long recs1=0UL;
149 unsigned long recs2=0UL;
150 int OutputNonZero = REPORT_EVERY_X_LINES ;
151 bool download_flag=false;
152 char *download_url=NULL;
153 struct getwordstruct gwarea;
154 longline line;
155 time_t tnum;
156 struct stat logstat;
157 struct userinfostruct *uinfo;
158 struct userfilestruct *first_user_file, *ufile, *ufile1, *prev_ufile;
159 static int split=0;
160 static int convert=0;
161 static int output_css=0;
162 int option_index;
163 static struct option long_options[]=
164 {
165 {"convert",no_argument,&convert,1},
166 {"css",no_argument,&output_css,1},
167 {"split",no_argument,&split,1},
168 {0,0,0,0}
169 };
170
171 #ifdef HAVE_LOCALE_H
172 setlocale(LC_TIME,"");
173 #endif
174
175 #if defined(ENABLE_NLS) && defined(HAVE_LOCALE_H)
176 if (!setlocale (LC_ALL, "")) {
177 fprintf(stderr,"SARG: Cannot set the locale LC_ALL to the environment variable\n");
178 exit(EXIT_FAILURE);
179 }
180 if (!bindtextdomain (PACKAGE_NAME, LOCALEDIR)) {
181 fprintf(stderr,"SARG: Cannot bind to text domain %s in directory %s (%s)\n",PACKAGE_NAME,LOCALEDIR,strerror(errno));
182 exit(EXIT_FAILURE);
183 }
184 if (!textdomain (PACKAGE_NAME)) {
185 fprintf(stderr,"SARG: Cannot set gettext domain for %s PACKAGE_NAME (%s)\n",PACKAGE_NAME,strerror(errno));
186 exit(EXIT_FAILURE);
187 }
188 #endif //ENABLE_NLS
189
190 BgImage[0]='\0';
191 LogoImage[0]='\0';
192 LogoText[0]='\0';
193 PasswdFile[0]='\0';
194 OutputEmail[0]='\0';
195 UserAgentLog[0]='\0';
196 ExcludeHosts[0]='\0';
197 ExcludeUsers[0]='\0';
198 ConfigFile[0]='\0';
199 code[0]='\0';
200 LastLog=0;
201 ReportType=0UL;
202 UserTabFile[0]='\0';
203 BlockIt[0]='\0';
204 ExternalCSSFile[0]='\0';
205 RedirectorLogFormat[0]='\0';
206 NRedirectorLogs=0;
207 for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
208
209 sprintf(ExcludeCodes,"%s/exclude_codes",SYSCONFDIR);
210 strcpy(GraphDaysBytesBarColor,"orange");
211 strcpy(BgColor,"#ffffff");
212 strcpy(TxColor,"#000000");
213 strcpy(TxBgColor,"lavender");
214 strcpy(TiColor,"darkblue");
215 strcpy(Width,"80");
216 strcpy(Height,"45");
217 strcpy(LogoTextColor,"#000000");
218 strcpy(HeaderColor,"darkblue");
219 strcpy(HeaderBgColor,"#dddddd");
220 strcpy(LogoTextColor,"#006699");
221 strcpy(FontSize,"9px");
222 strcpy(TempDir,"/tmp");
223 strcpy(OutputDir,"/var/www/html/squid-reports");
224 Ip2Name=false;
225 strcpy(DateFormat,"u");
226 OverwriteReport=false;
227 RemoveTempFiles=true;
228 strcpy(ReplaceIndex,"index.html");
229 Index=INDEX_YES;
230 RecordsWithoutUser=RECORDWITHOUTUSER_IP;
231 UseComma=0;
232 strcpy(MailUtility,"mailx");
233 TopSitesNum=100;
234 TopUsersNum=0;
235 UserIp=0;
236 strcpy(TopuserSortField,"BYTES");
237 strcpy(UserSortField,"BYTES");
238 strcpy(TopuserSortOrder,"reverse");
239 strcpy(UserSortOrder,"reverse");
240 strcpy(TopsitesSortField,"CONNECT");
241 strcpy(TopsitesSortType,"D");
242 LongUrl=0;
243 strcpy(FontFace,"Verdana,Tahoma,Arial");
244 datetimeby=DATETIME_BYTE;
245 strcpy(CharSet,"ISO-8859-1");
246 Privacy=0;
247 strcpy(PrivacyString,"***.***.***.***");
248 strcpy(PrivacyStringColor,"blue");
249 SuccessfulMsg=true;
250 TopUserFields=TOPUSERFIELDS_NUM | TOPUSERFIELDS_DATE_TIME | TOPUSERFIELDS_USERID | TOPUSERFIELDS_CONNECT |
251 TOPUSERFIELDS_BYTES | TOPUSERFIELDS_SETYB | TOPUSERFIELDS_IN_CACHE_OUT |
252 TOPUSERFIELDS_USED_TIME | TOPUSERFIELDS_MILISEC | TOPUSERFIELDS_PTIME |
253 TOPUSERFIELDS_TOTAL | TOPUSERFIELDS_AVERAGE;
254 UserReportFields=USERREPORTFIELDS_CONNECT | USERREPORTFIELDS_BYTES | USERREPORTFIELDS_SETYB |
255 USERREPORTFIELDS_IN_CACHE_OUT | USERREPORTFIELDS_USED_TIME | USERREPORTFIELDS_MILISEC |
256 USERREPORTFIELDS_PTIME | USERREPORTFIELDS_TOTAL | USERREPORTFIELDS_AVERAGE;
257 strcpy(DataFileDelimiter,";");
258 DataFileFields=DATA_FIELD_USER | DATA_FIELD_DATE | DATA_FIELD_TIME | DATA_FIELD_URL | DATA_FIELD_CONNECT |
259 DATA_FIELD_BYTES | DATA_FIELD_IN_CACHE | DATA_FIELD_OUT_CACHE | DATA_FIELD_ELAPSED;
260 ShowReadStatistics=true;
261 strcpy(IndexSortOrder,"D");
262 ShowSargInfo=true;
263 ShowSargLogo=true;
264 strcpy(ParsedOutputLog,"no");
265 strcpy(ParsedOutputLogCompress,"/bin/gzip -f");
266 DisplayedValues=DISPLAY_ABBREV;
267 strcpy(HeaderFontSize,"9px");
268 strcpy(TitleFontSize,"11px");
269 strcpy(AuthUserTemplateFile,"sarg_htaccess");
270 set_download_suffix("7z,ace,arj,avi,bat,bin,bz2,bzip,cab,com,cpio,dll,doc,dot,exe,gz,iso,lha,lzh,mdb,mov,mp3,mpeg,mpg,mso,nrg,ogg,ppt,rar,rtf,shs,src,sys,tar,tgz,vcd,vob,wma,wmv,zip");
271 Graphs=true;
272 #if defined(FONTDIR)
273 strcpy(GraphFont,FONTDIR"/DejaVuSans.ttf");
274 #else
275 GraphFont[0]='\0';
276 #endif
277 strcpy(Ulimit,"20000");
278 NtlmUserFormat=NTLMUSERFORMAT_DOMAINUSER;
279 IndexTree=INDEX_TREE_FILE;
280 strcpy(RealtimeTypes,"GET,PUT,CONNECT");
281 RealtimeUnauthRec=REALTIME_UNAUTH_REC_SHOW;
282 RedirectorFilterOutDate=true;
283 DansguardianFilterOutDate=true;
284 DataFileUrl=DATAFILEURL_IP;
285 strcpy(MaxElapsed,"28800000");
286 BytesInSitesUsersReport=0;
287 UserAuthentication=0;
288 strcpy(LDAPHost,"127.0.0.1");
289 LDAPPort=389;
290 LDAPProtocolVersion=3;
291 LDAPBindDN[0]='\0';
292 LDAPBindPW[0]='\0';
293 LDAPBaseSearch[0]='\0';
294 strcpy(LDAPFilterSearch, "uid=%s");
295 strcpy(LDAPTargetAttr, "cn");
296
297 dia[0]='\0';
298 mes[0]='\0';
299 ano[0]='\0';
300 hora[0]='\0';
301 tmp[0]='\0';
302 tmp3[0]='\0';
303 wtemp[0]='\0';
304 us[0]='\0';
305 date[0]='\0';
306 df[0]='\0';
307 uagent[0]='\0';
308 hexclude[0]='\0';
309 addr[0]='\0';
310 hm[0]='\0';
311 hmf[0]='\0';
312 site[0]='\0';
313 outdir[0]='\0';
314 elap[0]='\0';
315 email[0]='\0';
316 zip[0]='\0';
317 UserInvalidChar[0]='\0';
318 DataFile[0]='\0';
319 SquidGuardConf[0]='\0';
320 DansGuardianConf[0]='\0';
321 start_hour[0]='\0';
322 end_hour[0]='\0';
323
324 denied_count=0;
325 download_count=0;
326 authfail_count=0;
327 dansguardian_count=0;
328 squidguard_count=0;
329 useragent_count=0;
330 DeniedReportLimit=10;
331 AuthfailReportLimit=10;
332 DansGuardianReportLimit=10;
333 SquidGuardReportLimit=10;
334 DownloadReportLimit=50;
335 UserReportLimit=0;
336 debug=0;
337 debugz=0;
338 debugm=0;
339 iprel=false;
340 userip=false;
341 realt=false;
342 realtime_refresh=3;
343 realtime_access_log_lines=1000;
344 cost=0.01;
345 nocost=50000000;
346 ndownload=0;
347 squid24=false;
348 dfrom=0;
349 duntil=0;
350
351 bzero(IncludeUsers, sizeof(IncludeUsers));
352 bzero(ExcludeString, sizeof(ExcludeString));
353 first_user_file=NULL;
354 memset(&period,0,sizeof(period));
355
356 NAccessLog=0;
357 for(x=0; x<MAXLOGS; x++)
358 AccessLog[x][0]='\0';
359 AccessLogFromCmdLine=0;
360 RedirectorLogFromCmdLine=0;
361
362 strcpy(Title,_("Squid User Access Report"));
363
364 while((ch = getopt_long_only(argc, argv, "a:b:c:d:e:f:g:u:l:L:o:s:t:w:hijmnprvxyz",long_options,&option_index)) != -1){
365 switch(ch)
366 {
367 case 0:
368 break;
369 case 'a':
370 strcpy(addr,optarg);
371 break;
372 case 'b':
373 strcpy(uagent,optarg);
374 break;
375 case 'c':
376 strcpy(hexclude,optarg);
377 break;
378 case 'd':
379 strncpy(date,optarg,sizeof(date)-1);
380 date[sizeof(date)-1]='\0';
381 date_from(date, &dfrom, &duntil);
382 break;
383 case 'e':
384 strcpy(email,optarg);
385 break;
386 case 'f':
387 strcpy(ConfigFile,optarg);
388 break;
389 case 'g':
390 strcpy(df,optarg);
391 break;
392 case 'h':
393 usage(argv[0]);
394 exit(EXIT_SUCCESS);
395 case 'i':
396 iprel=true;
397 break;
398 case 'l':
399 if (NAccessLog>=MAXLOGS) {
400 debuga(_("Too many log files passed on command line with option -l.\n"));
401 exit(EXIT_FAILURE);
402 }
403 if (strlen(optarg)>=MAX_LOG_FILELEN) {
404 debuga(_("Log file name too long passed on command line with option -l: %s\n"),optarg);
405 exit(EXIT_FAILURE);
406 }
407 strcpy(AccessLog[NAccessLog],optarg);
408 NAccessLog++;
409 AccessLogFromCmdLine++;
410 break;
411 case 'L':
412 if (NRedirectorLogs>MAX_REDIRECTOR_LOGS) {
413 debuga(_("Too many redirector logs passed on command line with option -L.\n"));
414 exit(EXIT_FAILURE);
415 }
416 if (strlen(optarg)>=MAX_REDIRECTOR_FILELEN) {
417 debuga(_("Redirector log file name too long passed on command line with opton -L: %s\n"),optarg);
418 exit(EXIT_FAILURE);
419 }
420 strcpy(RedirectorLogs[NRedirectorLogs],optarg);
421 NRedirectorLogs++;
422 RedirectorLogFromCmdLine++;
423 break;
424 case 'm':
425 debugm++;
426 break;
427 case 'n':
428 dns=true;
429 break;
430 case 'o':
431 strcpy(outdir,optarg);
432 break;
433 case 'p':
434 userip=true;
435 break;
436 case 'r':
437 realt=true;
438 break;
439 case 's':
440 strcpy(site,optarg);
441 break;
442 case 't':
443 {
444 int h,m;
445
446 if(strstr(optarg,"-") == 0) {
447 strcpy(hm,optarg);
448 strcpy(hmf,optarg);
449 } else {
450 getword_start(&gwarea,optarg);
451 if (getword(hm,sizeof(hm),&gwarea,'-')<0 || getword(hmf,sizeof(hmf),&gwarea,'\0')<0) {
452 debuga(_("The time range passed on the command line with option -t is invalid\n"));
453 exit(EXIT_FAILURE);
454 }
455 }
456 if(sscanf(hm,"%d:%d",&h,&m)!=2) {
457 debuga(_("Time period must be MM or MM:SS. Exit\n"));
458 exit(EXIT_FAILURE);
459 }
460 sprintf(hm,"%02d%02d",h,m);
461 if(sscanf(hmf,"%d:%d",&h,&m)!=2) {
462 debuga(_("Time period must be MM or MM:SS. Exit\n"));
463 exit(EXIT_FAILURE);
464 }
465 sprintf(hmf,"%02d%02d",h,m);
466 break;
467 }
468 case 'u':
469 strcpy(us,optarg);
470 break;
471 case 'v':
472 version();
473 break;
474 case 'w':
475 strcpy(tmp,optarg);
476 break;
477 case 'x':
478 debug++;
479 break;
480 case 'y':
481 langcode++;
482 break;
483 case 'z':
484 debugz++;
485 break;
486 /*case ':':
487 debuga(_("Option -%c require an argument\n"),optopt);
488 errflg++;
489 break;*/
490 case '?':
491 usage(argv[0]);
492 exit(EXIT_FAILURE);
493 default:
494 abort();
495 }
496 }
497
498 if (errflg>0) {
499 usage(argv[0]);
500 exit(2);
501 }
502
503 if (optind<argc) {
504 for (iarq=optind ; iarq<argc ; iarq++) {
505 if (NAccessLog>=MAXLOGS) {
506 debuga(_("Too many log files passed on command line.\n"));
507 exit(EXIT_FAILURE);
508 }
509 if (strlen(argv[iarq])>=MAX_LOG_FILELEN) {
510 debuga(_("Log file name too long passed on command line: %s\n"),argv[iarq]);
511 exit(EXIT_FAILURE);
512 }
513 strcpy(AccessLog[NAccessLog],argv[iarq]);
514 NAccessLog++;
515 AccessLogFromCmdLine++;
516 }
517 }
518
519 if(debug) debuga(_("Init\n"));
520
521 if(ConfigFile[0] == '\0') snprintf(ConfigFile,sizeof(ConfigFile),"%s/sarg.conf",SYSCONFDIR);
522 if(access(ConfigFile, R_OK) != 0) {
523 debuga(_("Cannot open config file: %s - %s\n"),ConfigFile,strerror(errno));
524 exit(EXIT_FAILURE);
525 }
526
527 if(access(ConfigFile, R_OK) == 0)
528 getconf();
529
530 if(userip) UserIp=true;
531
532 if(dns) Ip2Name=true;
533
534 if(realt) {
535 realtime();
536 exit(EXIT_SUCCESS);
537 }
538
539 if(IndexTree == INDEX_TREE_FILE)
540 strcpy(ImageFile,"../images");
541 else
542 strcpy(ImageFile,"../../../images");
543
544 dataonly=0;
545 if(DataFile[0] != '\0')
546 dataonly++;
547
548 if(NAccessLog == 0) {
549 strcpy(AccessLog[0],"/var/log/squid/access.log");
550 NAccessLog++;
551 }
552
553 if(output_css) {
554 css_content(stdout);
555 exit(EXIT_SUCCESS);
556 }
557 if(split) {
558 for (iarq=0 ; iarq<NAccessLog ; iarq++)
559 splitlog(AccessLog[iarq], df, dfrom, duntil, convert);
560 exit(EXIT_SUCCESS);
561 }
562 if(convert) {
563 for (iarq=0 ; iarq<NAccessLog ; iarq++)
564 convlog(AccessLog[iarq], df, dfrom, duntil);
565 exit(EXIT_SUCCESS);
566 }
567
568 load_excludecodes(ExcludeCodes);
569
570 if(access(PasswdFile, R_OK) == 0) {
571 getusers(PasswdFile,debug);
572 puser++;
573 }
574
575 if(hexclude[0] == '\0')
576 strcpy(hexclude,ExcludeHosts);
577 if(hexclude[0] != '\0') {
578 gethexclude(hexclude,debug);
579 fhost=true;
580 }
581
582 if(ReportType == 0) {
583 ReportType=REPORT_TYPE_TOPUSERS | REPORT_TYPE_TOPSITES | REPORT_TYPE_USERS_SITES |
584 REPORT_TYPE_SITES_USERS | REPORT_TYPE_DATE_TIME | REPORT_TYPE_DENIED |
585 REPORT_TYPE_AUTH_FAILURES | REPORT_TYPE_SITE_USER_TIME_DATE | REPORT_TYPE_DOWNLOADS;
586 }
587
588 if(access(ExcludeUsers, R_OK) == 0) {
589 getuexclude(ExcludeUsers,debug);
590 fuser=true;
591 }
592
593 indexonly=0;
594 if(fuser) {
595 if(is_indexonly())
596 indexonly++;
597 }
598 if(strcmp(ExcludeUsers,"indexonly") == 0) indexonly++;
599 if(Index == INDEX_ONLY) indexonly++;
600
601 if(MaxElapsed[0] != '\0') max_elapsed=atol(MaxElapsed);
602
603 if(outdir[0] == '\0') strcpy(outdir,OutputDir);
604 strcat(outdir,"/");
605
606 if(uagent[0] == '\0') strcpy(uagent,UserAgentLog);
607
608 if(tmp[0] == '\0') strcpy(tmp,TempDir);
609 else strcpy(TempDir,tmp);
610 /*
611 For historical reasons, the temporary directory is the subdirectory "sarg" of the path
612 provided by the user.
613 */
614 strcat(tmp,"/sarg");
615
616 if(df[0] == '\0') strcpy(df,DateFormat);
617 else strcpy(DateFormat,df);
618
619 if(df[0] == '\0') {
620 strcpy(df,"u");
621 strcpy(DateFormat,"u");
622 }
623 if (df[0]=='w')
624 IndexTree=INDEX_TREE_FILE;
625
626 if(email[0] == '\0' && OutputEmail[0] != '\0') strcpy(email,OutputEmail);
627
628 if(email[0] != '\0') {
629 my_mkdir(tmp);
630 strcpy(outdir,tmp);
631 strcat(outdir,"/");
632 }
633
634 if(access(tmp, R_OK) == 0) {
635 unlinkdir(tmp,1);
636 }
637 my_mkdir(tmp);
638 snprintf(denied_unsort,sizeof(denied_unsort),"%s/denied.log.unsort",tmp);
639 snprintf(denied_sort,sizeof(denied_sort),"%s/denied.log",tmp);
640 snprintf(authfail_unsort,sizeof(authfail_unsort),"%s/authfail.log.unsort",tmp);
641
642 if(debug) {
643 debuga(_("Parameters:\n"));
644 debuga(_(" Hostname or IP address (-a) = %s\n"),addr);
645 debuga(_(" Useragent log (-b) = %s\n"),uagent);
646 debuga(_(" Exclude file (-c) = %s\n"),hexclude);
647 debuga(_(" Date from-until (-d) = %s\n"),date);
648 debuga(_(" Email address to send reports (-e) = %s\n"),email);
649 debuga(_(" Config file (-f) = %s\n"),ConfigFile);
650 if(strcmp(df,"e") == 0)
651 debuga(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
652 if(strcmp(df,"u") == 0)
653 debuga(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
654 if(strcmp(df,"w") == 0)
655 debuga(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
656 debuga(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
657 for (iarq=0 ; iarq<NAccessLog ; iarq++)
658 debuga(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
659 for (iarq=0 ; iarq<NRedirectorLogs ; iarq++)
660 debuga(_(" Redirector log (-L) = %s\n"),RedirectorLogs[iarq]);
661 debuga(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
662 debuga(_(" Output dir (-o) = %s\n"),outdir);
663 debuga(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
664 debuga(_(" Accessed site (-s) = %s\n"),site);
665 debuga(_(" Time (-t) = %s\n"),hm);
666 debuga(_(" User (-u) = %s\n"),us);
667 debuga(_(" Temporary dir (-w) = %s\n"),tmp);
668 debuga(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
669 debuga(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
670 debuga("\n");
671 }
672
673 if(debugm) {
674 printf(_("Parameters:\n"));
675 printf(_(" Hostname or IP address (-a) = %s\n"),addr);
676 printf(_(" Useragent log (-b) = %s\n"),uagent);
677 printf(_(" Exclude file (-c) = %s\n"),hexclude);
678 printf(_(" Date from-until (-d) = %s\n"),date);
679 printf(_(" Email address to send reports (-e) = %s\n"),email);
680 printf(_(" Config file (-f) = %s\n"),ConfigFile);
681 if(strcmp(df,"e") == 0)
682 printf(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
683 if(strcmp(df,"u") == 0)
684 printf(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
685 if(strcmp(df,"w") == 0)
686 printf(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
687 printf(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
688 for (iarq=0 ; iarq<NAccessLog ; iarq++)
689 printf(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
690 for (iarq=0 ; iarq<NRedirectorLogs ; iarq++)
691 printf(_(" Redirector log (-L) = %s\n"),RedirectorLogs[iarq]);
692 printf(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
693 printf(_(" Output dir (-o) = %s\n"),outdir);
694 printf(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
695 printf(_(" Accessed site (-s) = %s\n"),site);
696 printf(_(" Time (-t) = %s\n"),hm);
697 printf(_(" User (-u) = %s\n"),us);
698 printf(_(" Temporary dir (-w) = %s\n"),tmp);
699 printf(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
700 printf(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
701 printf(_("sarg version: %s\n"),VERSION);
702 }
703
704 if(debug)
705 debuga(_("sarg version: %s\n"),VERSION);
706
707 maxopenfiles=MAX_OPEN_USER_FILES;
708 #ifdef HAVE_RLIM_T
709 if (Ulimit[0] != '\0') {
710 struct rlimit rl;
711 long l1, l2;
712 int rc=0;
713
714 #if defined(RLIMIT_NOFILE)
715 getrlimit (RLIMIT_NOFILE, &rl);
716 #elif defined(RLIMIT_OFILE)
717 getrlimit (RLIMIT_OFILE, &rl);
718 #else
719 #warning "No rlimit resource for the number of open files"
720 #endif
721 l1 = rl.rlim_cur;
722 l2 = rl.rlim_max;
723
724 rl.rlim_cur = atol(Ulimit);
725 rl.rlim_max = atol(Ulimit);
726 #if defined(RLIMIT_NOFILE)
727 rc=setrlimit (RLIMIT_NOFILE, &rl);
728 #elif defined(RLIMIT_OFILE)
729 rc=setrlimit (RLIMIT_OFILE, &rl);
730 #else
731 #warning "No rlimit resource for the number of open files"
732 #endif
733 if(rc == -1) {
734 debuga(_("setrlimit error - %s\n"),strerror(errno));
735 }
736
737 if(debug)
738 debuga("Maximum file descriptor: cur=%ld max=%ld, changed to cur="RLIM_STRING" max="RLIM_STRING,l1,l2,rl.rlim_cur,rl.rlim_max);
739 }
740 #endif
741
742 init_usertab(UserTabFile);
743
744 if ((line=longline_create())==NULL) {
745 debuga(_("Not enough memory to read a log file\n"));
746 exit(EXIT_FAILURE);
747 }
748
749 snprintf(sz_Download_Unsort,sizeof(sz_Download_Unsort),"%s/download.unsort", tmp);
750
751 if(DataFile[0]=='\0') {
752 if((ReportType & REPORT_TYPE_DENIED) != 0) {
753 if((fp_denied=MY_FOPEN(denied_unsort,"w"))==NULL) {
754 debuga(_("(log) Cannot open file: %s - %s\n"),denied_unsort,strerror(errno));
755 exit(EXIT_FAILURE);
756 }
757 }
758
759 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
760 if((fp_authfail=MY_FOPEN(authfail_unsort,"w"))==NULL) {
761 debuga(_("(log) Cannot open file: %s - %s\n"),authfail_unsort,strerror(errno));
762 exit(EXIT_FAILURE);
763 }
764 }
765 }
766
767 for (iarq=0 ; iarq<NAccessLog ; iarq++) {
768 strcpy(arq,AccessLog[iarq]);
769
770 strcpy(arqtt,arq);
771
772 if(strcmp(arq,"-")==0) {
773 if(debug)
774 debuga(_("Reading access log file: from stdin\n"));
775 fp_in=stdin;
776 from_stdin=true;
777 } else {
778 if (date[0]!='\0') {
779 if (stat(arq,&logstat)!=0) {
780 debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
781 } else {
782 struct tm *logtime=localtime(&logstat.st_mtime);
783 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
784 debuga(_("Ignoring old log file %s\n"),arq);
785 continue;
786 }
787 }
788 }
789 fp_in=decomp(arq,&from_pipe);
790 if(fp_in==NULL) {
791 debuga(_("(log) Cannot open log file: %s - %s\n"),arq,strerror(errno));
792 exit(EXIT_FAILURE);
793 }
794 if(debug) debuga(_("Reading access log file: %s\n"),arq);
795 from_stdin=false;
796 }
797 ilf=ILF_Unknown;
798 download_flag=false;
799 // pre-read the file only if we have to show stats
800 if(ShowReadStatistics && !from_stdin && !from_pipe) {
801 size_t nread,i;
802 bool skipcr=false;
803 char tmp4[MAXLEN];
804
805 recs1=0UL;
806 recs2=0UL;
807
808 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
809 for (i=0 ; i<nread ; i++)
810 if (skipcr) {
811 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
812 skipcr=false;
813 }
814 } else {
815 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
816 skipcr=true;
817 recs1++;
818 }
819 }
820 }
821 rewind(fp_in);
822 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
823 putchar('\r');
824 fflush( stdout ) ;
825 }
826
827 longline_reset(line);
828
829 while ((linebuf=longline_read(fp_in,line))!=NULL) {
830 blen=strlen(linebuf);
831
832 if (ilf==ILF_Unknown) {
833 if(strncmp(linebuf,"#Software: Mic",14) == 0) {
834 fixendofline(linebuf);
835 if (debug)
836 debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
837 ilf=ILF_Isa;
838 ilf_count[ilf]++;
839 continue;
840 }
841
842 if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
843 if (getperiod_fromsarglog(arqtt,&period)<0) {
844 debuga(_("The name of the file is invalid: %s\n"),arq);
845 exit(EXIT_FAILURE);
846 }
847 ilf=ILF_Sarg;
848 ilf_count[ilf]++;
849 continue;
850 }
851 }
852
853 if(!fp_log && strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg) {
854 if(access(ParsedOutputLog,R_OK) != 0) {
855 my_mkdir(ParsedOutputLog);
856 }
857 sprintf(arq_log,"%s/sarg_temp.log",ParsedOutputLog);
858 if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
859 debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
860 exit(EXIT_FAILURE);
861 }
862 fputs("*** SARG Log ***\n",fp_log);
863 }
864
865 recs2++;
866 if( ShowReadStatistics && !from_stdin && --OutputNonZero<=0) {
867 double perc = recs2 * 100. / recs1 ;
868 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs1,perc);
869 putchar('\r');
870 fflush (stdout);
871 OutputNonZero = REPORT_EVERY_X_LINES ;
872 }
873 if(blen < 58) continue;
874 if(strstr(linebuf,"HTTP/0.0") != 0) continue;
875 if(strstr(linebuf,"logfile turned over") != 0) continue;
876 if(linebuf[0] == ' ') continue;
877
878 // exclude_string
879 if(ExcludeString[0] != '\0') {
880 bool exstring=false;
881 getword_start(&gwarea,ExcludeString);
882 while(strchr(gwarea.current,':') != 0) {
883 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
884 debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
885 exit(EXIT_FAILURE);
886 }
887 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
888 exstring=true;
889 break;
890 }
891 }
892 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
893 exstring=true;
894 if(exstring) continue;
895 }
896
897 totregsl++;
898 if(debugm)
899 printf("BUF=%s\n",linebuf);
900
901 t=NULL;
902 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
903 getword_start(&gwarea,linebuf);
904 if (getword(data,sizeof(data),&gwarea,' ')<0) {
905 debuga(_("Maybe you have a broken time in your access.log file\n"));
906 exit(EXIT_FAILURE);
907 }
908 if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
909 strcpy(ip,data);
910 strcpy(elap,"0");
911 if(squid24) {
912 if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
913 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
914 exit(EXIT_FAILURE);
915 }
916 } else {
917 if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
918 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
919 exit(EXIT_FAILURE);
920 }
921 }
922 if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
923 getword(fun,sizeof(fun),&gwarea,' ')<0) {
924 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
925 exit(EXIT_FAILURE);
926 }
927 if (getword_ptr(linebuf,&url,&gwarea,' ')<0) {
928 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
929 exit(EXIT_FAILURE);
930 }
931 if (getword_skip(MAXLEN,&gwarea,' ')<0) {
932 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
933 exit(EXIT_FAILURE);
934 }
935 if (getword(code2,sizeof(code2),&gwarea,' ')<0) {
936 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
937 exit(EXIT_FAILURE);
938 }
939 if (getword(tam,sizeof(tam),&gwarea,' ')<0) {
940 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
941 exit(EXIT_FAILURE);
942 }
943 if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
944 if (getword(code,sizeof(code),&gwarea,' ')<0) {
945 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
946 exit(EXIT_FAILURE);
947 }
948 } else {
949 if (getword(code,sizeof(code),&gwarea,'\0')<0) {
950 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
951 exit(EXIT_FAILURE);
952 }
953 }
954
955 if ((str = strchr(code, ':')) != NULL)
956 *str = '/';
957
958 if(strcmp(tam,"\0") == 0)
959 strcpy(tam,"0");
960
961 ilf=ILF_Common;
962 ilf_count[ilf]++;
963
964 getword_start(&gwarea,data+1);
965 if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
966 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
967 exit(EXIT_FAILURE);
968 }
969 if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
970 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
971 exit(EXIT_FAILURE);
972 }
973 getword_start(&gwarea,data);
974 if (getword_atoll(&iday,&gwarea,'/')<0){
975 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
976 exit(EXIT_FAILURE);
977 }
978 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
979 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
980 exit(EXIT_FAILURE);
981 }
982 if (getword_atoll(&iyear,&gwarea,'/')<0){
983 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
984 exit(EXIT_FAILURE);
985 }
986
987 imonth=month2num(mes)+1;
988 idata=builddia(iday,imonth,iyear);
989 computedate(iyear,imonth,iday,&tt);
990 t=&tt;
991 }
992
993 if(ilf==ILF_Unknown || ilf==ILF_Squid) {
994 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
995 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
996 exit(EXIT_FAILURE);
997 }
998 while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
999 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
1000 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
1001 exit(EXIT_FAILURE);
1002 }
1003 if(strlen(elap) < 1) continue;
1004 if (getword(ip,sizeof(ip),&gwarea,' ')<0){
1005 debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq);
1006 exit(EXIT_FAILURE);
1007 }
1008 if (getword(code,sizeof(code),&gwarea,' ')<0){
1009 debuga(_("Maybe you have a broken result code in your %s file\n"),arq);
1010 exit(EXIT_FAILURE);
1011 }
1012 if (getword(tam,sizeof(tam),&gwarea,' ')<0){
1013 debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq);
1014 exit(EXIT_FAILURE);
1015 }
1016 if (getword(fun,sizeof(fun),&gwarea,' ')<0){
1017 debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
1018 exit(EXIT_FAILURE);
1019 }
1020 if (getword_ptr(linebuf,&url,&gwarea,' ')<0){
1021 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
1022 exit(EXIT_FAILURE);
1023 }
1024 if (getword(user,sizeof(user),&gwarea,' ')<0){
1025 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
1026 exit(EXIT_FAILURE);
1027 }
1028 ilf=ILF_Squid;
1029 ilf_count[ilf]++;
1030
1031 tnum=atoi(data);
1032 t=localtime(&tnum);
1033 if (t == NULL) {
1034 debuga(_("Cannot convert the timestamp from the squid log file\n"));
1035 exit(EXIT_FAILURE);
1036 }
1037
1038 strftime(tbuf2, sizeof(tbuf2), "%H%M", t);
1039
1040 idata=(t->tm_year+1900)*10000+(t->tm_mon+1)*100+t->tm_mday;
1041 }
1042 }
1043 if (ilf==ILF_Sarg) {
1044 getword_start(&gwarea,linebuf);
1045 if (getword(data,sizeof(data),&gwarea,'\t')<0){
1046 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1047 exit(EXIT_FAILURE);
1048 }
1049 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
1050 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1051 exit(EXIT_FAILURE);
1052 }
1053 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
1054 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1055 exit(EXIT_FAILURE);
1056 }
1057 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
1058 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1059 exit(EXIT_FAILURE);
1060 }
1061 if (getword_ptr(linebuf,&url,&gwarea,'\t')<0){
1062 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1063 exit(EXIT_FAILURE);
1064 }
1065 if (getword(tam,sizeof(tam),&gwarea,'\t')<0){
1066 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1067 exit(EXIT_FAILURE);
1068 }
1069 if (getword(code,sizeof(code),&gwarea,'\t')<0){
1070 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1071 exit(EXIT_FAILURE);
1072 }
1073 if (getword(elap,sizeof(elap),&gwarea,'\t')<0){
1074 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1075 exit(EXIT_FAILURE);
1076 }
1077 if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
1078 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1079 exit(EXIT_FAILURE);
1080 }
1081 getword_start(&gwarea,data);
1082 if (getword_atoll(&iday,&gwarea,'/')<0 || iday<1 || iday>31){
1083 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1084 exit(EXIT_FAILURE);
1085 }
1086 if (getword_atoll(&imonth,&gwarea,'/')<0 || imonth<1 || imonth>12){
1087 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1088 exit(EXIT_FAILURE);
1089 }
1090 if (getword_atoll(&iyear,&gwarea,'\0')<0){
1091 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1092 exit(EXIT_FAILURE);
1093 }
1094 idata=builddia(iday,imonth,iyear);
1095 computedate(iyear,imonth,iday,&tt);
1096 t=&tt;
1097 }
1098 if (ilf==ILF_Isa) {
1099 if (linebuf[0] == '#') {
1100 int ncols,cols[ISACOL_Last];
1101
1102 fixendofline(linebuf);
1103 getword_start(&gwarea,linebuf);
1104 // remove the #Fields: column at the beginning of the line
1105 if (getword_skip(1000,&gwarea,' ')<0){
1106 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1107 exit(EXIT_FAILURE);
1108 }
1109 for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
1110 ncols=0;
1111 while(gwarea.current[0] != '\0') {
1112 if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
1113 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1114 exit(EXIT_FAILURE);
1115 }
1116 if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
1117 if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
1118 if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
1119 if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
1120 if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
1121 if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
1122 if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
1123 if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
1124 ncols++;
1125 }
1126 if (cols[ISACOL_Ip]>=0) {
1127 isa_ncols=ncols;
1128 for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
1129 isa_cols[ncols]=cols[ncols];
1130 }
1131 continue;
1132 }
1133 if (!isa_ncols) continue;
1134 getword_start(&gwarea,linebuf);
1135 for (x=0 ; x<isa_ncols ; x++) {
1136 if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
1137 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1138 exit(EXIT_FAILURE);
1139 }
1140 if (x==isa_cols[ISACOL_Ip]) {
1141 if (strlen(str)>=sizeof(ip)) {
1142 debuga(_("Maybe you have a broken IP in your %s file\n"),arq);
1143 exit(EXIT_FAILURE);
1144 }
1145 strcpy(ip,str);
1146 } else if (x==isa_cols[ISACOL_UserName]) {
1147 if (strlen(str)>=sizeof(user)) {
1148 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
1149 exit(EXIT_FAILURE);
1150 }
1151 strcpy(user,str);
1152 } else if (x==isa_cols[ISACOL_Date]) {
1153 if (strlen(str)>=sizeof(data)) {
1154 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1155 exit(EXIT_FAILURE);
1156 }
1157 strcpy(data,str);
1158 } else if (x==isa_cols[ISACOL_Time]) {
1159 if (strlen(str)>=sizeof(hora)) {
1160 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1161 exit(EXIT_FAILURE);
1162 }
1163 strcpy(hora,str);
1164 } else if (x==isa_cols[ISACOL_TimeTaken]) {
1165 if (strlen(str)>=sizeof(elap)) {
1166 debuga(_("Maybe you have a broken download duration in your %s file\n"),arq);
1167 exit(EXIT_FAILURE);
1168 }
1169 strcpy(elap,str);
1170 } else if (x==isa_cols[ISACOL_Bytes]) {
1171 if (strlen(str)>=sizeof(tam)) {
1172 debuga(_("Maybe you have a broken download size in your %s file\n"),arq);
1173 exit(EXIT_FAILURE);
1174 }
1175 strcpy(tam,str);
1176 } else if (x==isa_cols[ISACOL_Uri]) {
1177 url=str;
1178 } else if (x==isa_cols[ISACOL_Status]) {
1179 if (strlen(str)>=sizeof(code)) {
1180 debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
1181 exit(EXIT_FAILURE);
1182 }
1183 strcpy(code,str);
1184 }
1185 }
1186
1187 if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
1188 sprintf(val1,"DENIED/%s",code);
1189 strcpy(code,val1);
1190 }
1191 getword_start(&gwarea,data);
1192 if (getword_atoll(&iyear,&gwarea,'-')<0){
1193 debuga(_("Maybe you have a broken year in your %s file\n"),arq);
1194 exit(EXIT_FAILURE);
1195 }
1196 if (getword_atoll(&imonth,&gwarea,'-')<0){
1197 debuga(_("Maybe you have a broken month in your %s file\n"),arq);
1198 exit(EXIT_FAILURE);
1199 }
1200 if (getword_atoll(&iday,&gwarea,'\0')<0){
1201 debuga(_("Maybe you have a broken day in your %s file\n"),arq);
1202 exit(EXIT_FAILURE);
1203 }
1204
1205 idata=builddia(iday,imonth,iyear);
1206 computedate(iyear,imonth,iday,&tt);
1207 t=&tt;
1208 }
1209 if (t==NULL) {
1210 debuga(_("Unknown input log file format\n"));
1211 break;
1212 }
1213
1214 strftime(dia, sizeof(dia), "%d/%m/%Y", t);
1215 snprintf(hora,sizeof(hora),"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec);
1216
1217 if(debugm)
1218 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",date,idata,dfrom,duntil);
1219
1220 if(date[0] != '\0'){
1221 if(idata < dfrom || idata > duntil) continue;
1222 }
1223
1224 // Record only hours usage which is required
1225 if (t) {
1226 if( bsearch( &( t -> tm_wday ), weekdays.list, weekdays.len,
1227 sizeof( int ), compar ) == NULL )
1228 continue;
1229
1230 if( bsearch( &( t -> tm_hour ), hours.list, hours.len,
1231 sizeof( int ), compar ) == NULL )
1232 continue;
1233 }
1234
1235
1236 if(strlen(user) > MAX_USER_LEN) {
1237 if (debugm) printf(_("User ID too long: %s\n"),user);
1238 totregsx++;
1239 continue;
1240 }
1241
1242 // include_users
1243 if(IncludeUsers[0] != '\0') {
1244 sprintf(val1,":%s:",user);
1245 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
1246 continue;
1247 }
1248
1249 if(vercode(code)) {
1250 if (debugm) printf(_("Excluded code: %s\n"),code);
1251 totregsx++;
1252 continue;
1253 }
1254
1255 if(testvaliduserchar(user))
1256 continue;
1257
1258 #if 0
1259 if((str = strstr(user,"%20")) != NULL) {
1260 /*
1261 This is a patch introduced to solve bug #1624251 reported at sourceforge but
1262 the side effect is to truncate the name at the first space and merge the reports
1263 of people whose name is identical up to the first space.
1264
1265 The old code used to truncate the user name at the first % if a %20 was
1266 found anywhere in the string. That means the string could be truncated
1267 at the wrong place if another % occured before the %20. This new code should
1268 avoid that problem and only truncate at the space. There is no bug
1269 report indicating that anybody noticed this.
1270 */
1271 *str='\0';
1272 }
1273
1274 /*
1275 Code prior to 2.2.7 used to replace any %xx by a dot as long as a %5c was
1276 found in the user name.
1277 */
1278 while((str = strstr(user,"%5c")) != NULL) {
1279 *str='.';
1280 for (x=3 ; str[x] ; x++) str[x-2]=str[x];
1281 }
1282 #endif
1283
1284 urly=url;
1285
1286 if(ilf!=ILF_Sarg) {
1287 /*
1288 The full URL is not saved in sarg log. There is no point in testing the URL to detect
1289 a downloaded file.
1290 */
1291 download_flag=is_download_suffix(url);
1292 if (download_flag) {
1293 download_url=url;
1294 download_count++;
1295 }
1296 } else
1297 download_flag=false;
1298
1299 // remove any protocol:// at the beginning of the URL
1300 if ((str = strchr(url,'/')) != NULL && str[1] == '/') {
1301 int i;
1302
1303 str+=2;
1304 for (i=0 ; str[i] ; i++)
1305 url[i]=str[i];
1306 url[i]='\0';
1307 }
1308
1309 if(!LongUrl) {
1310 url_hostname(url,hostname,sizeof(hostname));
1311 url=hostname;
1312 }
1313
1314 if(url[0] == '\0') continue;
1315
1316 if(addr[0] != '\0'){
1317 if(strcmp(addr,ip)!=0) continue;
1318 }
1319 if(fhost) {
1320 if(!vhexclude(url)) {
1321 if (debugm) printf(_("Excluded site: %s\n"),url);
1322 totregsx++;
1323 continue;
1324 }
1325 }
1326
1327 if(hm[0] != '\0') {
1328 hmr[0]='\0';
1329 chm++;
1330 getword_start(&gwarea,hora);
1331 while(chm) {
1332 if (getword_multisep(warea,sizeof(warea),&gwarea,':')<0){
1333 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1334 exit(EXIT_FAILURE);
1335 }
1336 strncat(hmr,warea,2);
1337 chm--;
1338 }
1339 strncat(hmr,gwarea.current,2);
1340
1341 if(atoi(hmr) < atoi(hm) || atoi(hmr) > atoi(hmf)) continue;
1342 }
1343
1344 if(site[0] != '\0'){
1345 if(strstr(url,site)==0) continue;
1346 }
1347
1348 if(UserIp) {
1349 strcpy(user,ip);
1350 id_is_ip=true;
1351 } else {
1352 id_is_ip=false;
1353 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
1354 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
1355 strcpy(user,ip);
1356 id_is_ip=true;
1357 }
1358 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
1359 continue;
1360 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
1361 strcpy(user,"everybody");
1362 } else {
1363 strlow(user);
1364 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
1365 if((str = strchr(user,'_')) != 0) {
1366 strcpy(warea,str+1);
1367 strcpy(user,warea);
1368 }
1369 if((str = strchr(user,'+')) != 0) {
1370 strcpy(warea,str+1);
1371 strcpy(user,warea);
1372 }
1373 }
1374 }
1375 }
1376
1377 if(us[0] != '\0'){
1378 if(strcmp(user,us)!=0) continue;
1379 }
1380
1381 if(puser) {
1382 sprintf(wuser,":%s:",user);
1383 if(strstr(userfile, wuser) == 0)
1384 continue;
1385 }
1386
1387 if(fuser) {
1388 if(!vuexclude(user)) {
1389 if (debugm) printf(_("Excluded user: %s\n"),user);
1390 totregsx++;
1391 continue;
1392 }
1393 }
1394
1395 if(strcmp(user,"-") ==0 || strcmp(user," ") ==0 || strcmp(user,"") ==0 || strcmp(user,":") ==0)
1396 continue;
1397
1398 if(max_elapsed) {
1399 if(atol(elap)>max_elapsed) {
1400 elap[0]='0';
1401 elap[1]='\0';
1402 }
1403 }
1404
1405 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
1406 fixendofline(str);
1407 sprintf(smartfilter,"\"%s\"",str+1);
1408 } else sprintf(smartfilter,"\"\"");
1409
1410 nopen=0;
1411 prev_ufile=NULL;
1412 for (ufile=first_user_file ; ufile && strcmp(user,ufile->user->id)!=0 ; ufile=ufile->next) {
1413 prev_ufile=ufile;
1414 if (ufile->file) nopen++;
1415 }
1416 if (!ufile) {
1417 ufile=malloc(sizeof(*ufile));
1418 if (!ufile) {
1419 debuga(_("Not enough memory to store the user %s\n"),user);
1420 exit(EXIT_FAILURE);
1421 }
1422 memset(ufile,0,sizeof(*ufile));
1423 ufile->next=first_user_file;
1424 first_user_file=ufile;
1425 uinfo=userinfo_create(user);
1426 ufile->user=uinfo;
1427 uinfo->id_is_ip=id_is_ip;
1428 } else {
1429 if (prev_ufile) {
1430 prev_ufile->next=ufile->next;
1431 ufile->next=first_user_file;
1432 first_user_file=ufile;
1433 }
1434 }
1435
1436 if (ufile->file==NULL) {
1437 if (nopen>=maxopenfiles) {
1438 x=0;
1439 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
1440 if (ufile1->file!=NULL) {
1441 if (x>=maxopenfiles) {
1442 if (fclose(ufile1->file)==EOF) {
1443 debuga(_("Failed to close the log file of user %s - %s\n"),ufile1->user->id,strerror(errno));
1444 exit(EXIT_FAILURE);
1445 }
1446 ufile1->file=NULL;
1447 }
1448 x++;
1449 }
1450 }
1451 }
1452 if (snprintf (tmp3, sizeof(tmp3), "%s/%s.unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
1453 debuga(_("Temporary user file name too long: %s/%s.unsort\n"), tmp, ufile->user->filename);
1454 exit(EXIT_FAILURE);
1455 }
1456 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
1457 debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno));
1458 exit (1);
1459 }
1460 }
1461
1462 /*if ( strcmp ( user , sz_Last_User ) != 0 ) {
1463 if ( fp_Write_User )
1464 fclose( fp_Write_User ) ;
1465 sprintf (tmp3, "%s/%s.unsort", tmp, user);
1466
1467 if ((fp_Write_User = MY_FOPEN (tmp3, "a")) == NULL) {
1468 fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], _("Cannot open temporary file"), tmp3, strerror(errno));
1469 exit (1);
1470 }
1471 strcpy( sz_Last_User , user ) ;
1472 }*/
1473 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,ip,url,tam,code,elap,smartfilter)<=0) {
1474 debuga(_("Write error in the log file of user %s\n"),user);
1475 exit(EXIT_FAILURE);
1476 }
1477
1478 if(fp_log && ilf!=ILF_Sarg)
1479 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,url,tam,code,elap,smartfilter);
1480
1481 totregsg++;
1482
1483 if(!dataonly && download_flag && download_url && strstr(code,"DENIED") == 0) {
1484 ndownload = 1;
1485
1486 if ( ! fp_Download_Unsort ) {
1487 if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
1488 debuga(_("(log) Cannot open temporary file: %s - %s\n"),sz_Download_Unsort, strerror(errno));
1489 exit (1);
1490 }
1491 }
1492 fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,download_url);
1493 }
1494
1495 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
1496 if(fp_denied && strstr(code,"DENIED/403") != 0) {
1497 fprintf(fp_denied, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1498 denied_count++;
1499 }
1500 if(fp_authfail && (strstr(code,"DENIED/401") != 0 || strstr(code,"DENIED/407") != 0)) {
1501 fprintf(fp_authfail, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1502 authfail_count++;
1503 }
1504 }
1505
1506 if (ilf!=ILF_Sarg) {
1507 if(!totper || idata<mindate){
1508 mindate=idata;
1509 memcpy(&period.start,t,sizeof(*t));
1510 strcpy(start_hour,tbuf2);
1511 }
1512 if (!totper || idata>maxdate) {
1513 maxdate=idata;
1514 memcpy(&period.end,t,sizeof(*t));
1515 }
1516 totper=true;
1517 }
1518
1519 if(debugm){
1520 printf("IP=\t%s\n",ip);
1521 printf("USER=\t%s\n",user);
1522 printf("ELAP=\t%s\n",elap);
1523 printf("DATE=\t%s\n",dia);
1524 printf("TIME=\t%s\n",hora);
1525 printf("FUNC=\t%s\n",fun);
1526 printf("URL=\t%s\n",url);
1527 printf("CODE=\t%s\n",code);
1528 printf("LEN=\t%s\n",tam);
1529 }
1530 }
1531 if (!from_stdin) {
1532 if (from_pipe)
1533 pclose(fp_in);
1534 else
1535 fclose(fp_in);
1536 if( ShowReadStatistics )
1537 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs1, (float) 100 );
1538 }
1539 }
1540
1541 if (debug)
1542 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
1543
1544 longline_destroy(&line);
1545 if ( fp_Download_Unsort )
1546 fclose (fp_Download_Unsort);
1547
1548 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
1549 ufile1=ufile->next;
1550 if (ufile->file!=NULL) fclose(ufile->file);
1551 free(ufile);
1552 }
1553
1554 free_download();
1555 free_excludecodes();
1556 free_exclude();
1557
1558 if(debug) {
1559 int totalcount=0;
1560
1561 for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
1562
1563 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
1564 debuga(_("Log with mixed records format (squid and common log)\n"));
1565
1566 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
1567 debuga(_("Common log format\n"));
1568
1569 if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
1570 debuga(_("Squid log format\n"));
1571
1572 if(ilf_count[ILF_Sarg]>0)
1573 debuga(_("Sarg log format\n"));
1574
1575 if(totalcount==0 && totregsg)
1576 debuga(_("Log with invalid format\n"));
1577 }
1578
1579 if(!totregsg){
1580 debuga(_("No records found\n"));
1581 debuga(_("End\n"));
1582 if(fp_denied) fclose(fp_denied);
1583 if(fp_authfail) fclose(fp_authfail);
1584 userinfo_free();
1585 if(userfile) free(userfile);
1586 close_usertab();
1587 exit(EXIT_SUCCESS);
1588 }
1589
1590 if (date[0]!='\0') {
1591 char date0[30], date1[30];
1592
1593 strftime(date0,sizeof(date0),"%d/%m/%Y",&period.start);
1594 strftime(date1,sizeof(date1),"%d/%m/%Y",&period.end);
1595 debuga(_("Period covered by log files: %s-%s\n"),date0,date1);
1596 getperiod_fromrange(&period,dfrom,duntil);
1597 }
1598 if (getperiod_buildtext(&period)<0) {
1599 debuga(_("Failed to build the string representation of the date range\n"));
1600 exit(EXIT_FAILURE);
1601 }
1602
1603 if(debugz){
1604 debugaz("data",dia);
1605 debugaz("period",period.text);
1606 }
1607
1608 if(debug)
1609 debuga(_("Period: %s\n"),period.text);
1610
1611 // fclose(fp_ou);
1612 if(fp_denied)
1613 fclose(fp_denied);
1614 if(fp_authfail)
1615 fclose(fp_authfail);
1616
1617 if(fp_log != NULL) {
1618 fclose(fp_log);
1619 strcpy(end_hour,tbuf2);
1620 strftime(val2,sizeof(val2),"%d%m%Y",&period.start);
1621 strftime(val1,sizeof(val1),"%d%m%Y",&period.end);
1622 sprintf(val4,"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour);
1623 if (rename(arq_log,val4)) {
1624 debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno));
1625 } else {
1626 strcpy(arq_log,val4);
1627
1628 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
1629 /*
1630 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
1631 necessary around the command name, put them in the configuration file.
1632 */
1633 sprintf(val1,"%s \"%s\"",ParsedOutputLogCompress,arq_log);
1634 cstatus=system(val1);
1635 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1636 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
1637 debuga(_("command: %s\n"),val1);
1638 exit(EXIT_FAILURE);
1639 }
1640 }
1641 }
1642 if(debug)
1643 debuga(_("Sarg parsed log saved as %s\n"),arq_log);
1644 }
1645
1646 if(DataFile[0] == '\0' && (ReportType & REPORT_TYPE_DENIED) != 0) {
1647 sprintf(csort,"sort -T \"%s\" -k 3,3 -k 5,5 -o \"%s\" \"%s\"",tmp,denied_sort,denied_unsort);
1648 cstatus=system(csort);
1649 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1650 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
1651 debuga(_("sort command: %s\n"),csort);
1652 exit(EXIT_FAILURE);
1653 }
1654 unlink(denied_unsort);
1655 }
1656
1657 sort_users_log(tmp, debug);
1658
1659 if(DataFile[0] != '\0')
1660 data_file(tmp);
1661 else
1662 gerarel();
1663
1664 if((ReportType & REPORT_TYPE_DENIED) != 0)
1665 unlink(denied_sort);
1666
1667 if(strcmp(tmp,"/tmp") != 0) {
1668 unlinkdir(tmp,0);
1669 }
1670
1671 userinfo_free();
1672 if(userfile)
1673 free(userfile);
1674 close_usertab();
1675
1676 if(debug)
1677 debuga(_("End\n"));
1678
1679 exit(EXIT_SUCCESS);
1680
1681 }
1682
1683
1684 static void getusers(const char *pwdfile, int debug)
1685 {
1686
1687 FILE *fp_usr;
1688 char buf[255];
1689 char *str;
1690 long int nreg=0;
1691
1692 if(debug)
1693 debuga(_("Loading password file from %s\n"),pwdfile);
1694
1695 if ((fp_usr = fopen(pwdfile, "r")) == NULL) {
1696 debuga(_("(getusers) Cannot open file %s - %s\n"),pwdfile,strerror(errno));
1697 exit(EXIT_FAILURE);
1698 }
1699
1700 if (fseek(fp_usr, 0, SEEK_END)==-1) {
1701 debuga(_("Failed to move till the end of the users file %s: %s\n"),pwdfile,strerror(errno));
1702 exit(EXIT_FAILURE);
1703 }
1704 nreg = ftell(fp_usr);
1705 if (nreg<0) {
1706 debuga(_("Cannot get the size of file %s\n"),pwdfile);
1707 exit(EXIT_FAILURE);
1708 }
1709 nreg = nreg+5000;
1710 if (fseek(fp_usr, 0, SEEK_SET)==-1) {
1711 debuga(_("Failed to rewind the users file %s: %s\n"),pwdfile,strerror(errno));
1712 exit(EXIT_FAILURE);
1713 }
1714
1715 if((userfile=(char *) malloc(nreg))==NULL){
1716 debuga(_("malloc error (%ld)\n"),nreg);
1717 exit(EXIT_FAILURE);
1718 }
1719
1720 bzero(userfile,nreg);
1721 strcpy(userfile,":");
1722
1723 while(fgets(buf,sizeof(buf),fp_usr)!=NULL) {
1724 str=strchr(buf,':');
1725 if (!str) {
1726 debuga(_("You have an invalid user in your %s file\n"),pwdfile);
1727 exit(EXIT_FAILURE);
1728 }
1729 str[1]='\0';
1730 strcat(userfile,buf);
1731 }
1732
1733 fclose(fp_usr);
1734
1735 return;
1736 }