]> git.ipfire.org Git - thirdparty/sarg.git/blob - log.c
Add an option to make sarg generates the css template
[thirdparty/sarg.git] / log.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2010
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 #ifdef HAVE_GETOPT_H
31 #include <getopt.h>
32 #endif
33
34 #define REPORT_EVERY_X_LINES 5000
35 #define MAX_OPEN_USER_FILES 10
36
37 struct userfilestruct
38 {
39 struct userfilestruct *next;
40 struct userinfostruct *user;
41 FILE *file;
42 };
43
44 /*@null@*/static char *userfile=NULL;
45
46 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
47 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
48 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
49
50 static void getusers(const char *pwdfile, int debug);
51
52 int main(int argc,char *argv[])
53 {
54 enum isa_col_id {
55 ISACOL_Ip,
56 ISACOL_UserName,
57 ISACOL_Date,
58 ISACOL_Time,
59 ISACOL_TimeTaken,
60 ISACOL_Bytes,
61 ISACOL_Uri,
62 ISACOL_Status,
63 ISACOL_Last //last entry of the list !
64 };
65 enum InputLogFormat {
66 ILF_Unknown,
67 ILF_Squid,
68 ILF_Common,
69 ILF_Sarg,
70 ILF_Isa,
71 ILF_Last //last entry of the list !
72 };
73
74 FILE *fp_in = NULL, *fp_denied=NULL, *fp_authfail=NULL, *fp_log=NULL;
75
76 char sz_Download_Unsort[ 20000 ] ;
77 FILE * fp_Download_Unsort = NULL ;
78
79 extern int optind;
80 extern int optopt;
81 extern char *optarg;
82
83 char data[255];
84 char elap[255];
85 char ip[MAXLEN];
86 char tam[255];
87 char fun[MAXLEN];
88 char wuser[MAXLEN];
89 char smartfilter[MAXLEN];
90 char dia[128];
91 char mes[30];
92 char ano[30];
93 char hora[30];
94 char wtemp[MAXLEN];
95 char wtemp2[255];
96 char date[255];
97 char arq[255];
98 char arq_log[255];
99 char hm[15], hmf[15], hmr[15];
100 int chm=0;
101 char uagent[MAXLEN];
102 char hexclude[MAXLEN];
103 char csort[MAXLEN];
104 int cstatus;
105 char tbuf2[128];
106 char zip[20];
107 char *str;
108 char tmp2[MAXLEN];
109 char tmp3[MAXLEN];
110 char denied_unsort[MAXLEN];
111 char denied_sort[MAXLEN];
112 char authfail_unsort[MAXLEN];
113 char start_hour[128];
114 char end_hour[128];
115 char *linebuf;
116 char hostname[512];
117 char *url;
118 char *urly;
119 char user[MAX_USER_LEN];
120 enum InputLogFormat ilf;
121 int ilf_count[ILF_Last];
122 int ch;
123 int x;
124 int errflg=0;
125 int puser=0;
126 bool fhost=false;
127 bool dns=false;
128 bool fuser=false;
129 int idata=0;
130 int mindate=0;
131 int maxdate=0;
132 int iarq=0;
133 int isa_ncols=0,isa_cols[ISACOL_Last];
134 bool from_stdin;
135 bool from_pipe;
136 int blen;
137 int maxopenfiles;
138 int nopen;
139 bool id_is_ip;
140 long totregsl=0;
141 long totregsg=0;
142 long totregsx=0;
143 bool totper=false;
144 long int max_elapsed=0;
145 long long int iyear, imonth, iday;
146 bool realt;
147 bool userip;
148 struct tm tt;
149 struct tm *t;
150 unsigned long recs1=0UL;
151 unsigned long recs2=0UL;
152 int OutputNonZero = REPORT_EVERY_X_LINES ;
153 bool download_flag=false;
154 char *download_url=NULL;
155 struct getwordstruct gwarea;
156 longline line;
157 time_t tnum;
158 struct stat logstat;
159 struct userinfostruct *uinfo;
160 struct userfilestruct *first_user_file, *ufile, *ufile1, *prev_ufile;
161 static int split=0;
162 static int convert=0;
163 static int output_css=0;
164 int option_index;
165 static struct option long_options[]=
166 {
167 {"convert",no_argument,&convert,1},
168 {"css",no_argument,&output_css,1},
169 {"split",no_argument,&split,1},
170 {0,0,0,0}
171 };
172
173 #ifdef HAVE_LOCALE_H
174 setlocale(LC_TIME,"");
175 #endif
176
177 #if defined(ENABLE_NLS) && defined(HAVE_LOCALE_H)
178 if (!setlocale (LC_ALL, "")) {
179 fprintf(stderr,"SARG: Cannot set the locale LC_ALL to the environment variable\n");
180 exit(EXIT_FAILURE);
181 }
182 if (!bindtextdomain (PACKAGE_NAME, LOCALEDIR)) {
183 fprintf(stderr,"SARG: Cannot bind to text domain %s in directory %s (%s)\n",PACKAGE_NAME,LOCALEDIR,strerror(errno));
184 exit(EXIT_FAILURE);
185 }
186 if (!textdomain (PACKAGE_NAME)) {
187 fprintf(stderr,"SARG: Cannot set gettext domain for %s PACKAGE_NAME (%s)\n",PACKAGE_NAME,strerror(errno));
188 exit(EXIT_FAILURE);
189 }
190 #endif //ENABLE_NLS
191
192 BgImage[0]='\0';
193 LogoImage[0]='\0';
194 LogoText[0]='\0';
195 PasswdFile[0]='\0';
196 OutputEmail[0]='\0';
197 UserAgentLog[0]='\0';
198 ExcludeHosts[0]='\0';
199 ExcludeUsers[0]='\0';
200 ConfigFile[0]='\0';
201 code[0]='\0';
202 LastLog=0;
203 ReportType=0UL;
204 UserTabFile[0]='\0';
205 BlockIt[0]='\0';
206 ExternalCSSFile[0]='\0';
207 RedirectorLogFormat[0]='\0';
208 NRedirectorLogs=0;
209 for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
210
211 sprintf(ExcludeCodes,"%s/exclude_codes",SYSCONFDIR);
212 strcpy(GraphDaysBytesBarColor,"orange");
213 strcpy(BgColor,"#ffffff");
214 strcpy(TxColor,"#000000");
215 strcpy(TxBgColor,"lavender");
216 strcpy(TiColor,"darkblue");
217 strcpy(Width,"80");
218 strcpy(Height,"45");
219 strcpy(LogoTextColor,"#000000");
220 strcpy(HeaderColor,"darkblue");
221 strcpy(HeaderBgColor,"#dddddd");
222 strcpy(LogoTextColor,"#006699");
223 strcpy(FontSize,"9px");
224 strcpy(TempDir,"/tmp");
225 strcpy(OutputDir,"/var/www/html/squid-reports");
226 Ip2Name=false;
227 strcpy(DateFormat,"u");
228 OverwriteReport=false;
229 RemoveTempFiles=true;
230 strcpy(ReplaceIndex,"index.html");
231 Index=INDEX_YES;
232 RecordsWithoutUser=RECORDWITHOUTUSER_IP;
233 UseComma=0;
234 strcpy(MailUtility,"mailx");
235 TopSitesNum=100;
236 TopUsersNum=0;
237 UserIp=0;
238 strcpy(TopuserSortField,"BYTES");
239 strcpy(UserSortField,"BYTES");
240 strcpy(TopuserSortOrder,"reverse");
241 strcpy(UserSortOrder,"reverse");
242 strcpy(TopsitesSortField,"CONNECT");
243 strcpy(TopsitesSortType,"D");
244 LongUrl=0;
245 strcpy(FontFace,"Verdana,Tahoma,Arial");
246 datetimeby=DATETIME_BYTE;
247 strcpy(CharSet,"ISO-8859-1");
248 Privacy=0;
249 strcpy(PrivacyString,"***.***.***.***");
250 strcpy(PrivacyStringColor,"blue");
251 SuccessfulMsg=true;
252 TopUserFields=TOPUSERFIELDS_NUM | TOPUSERFIELDS_DATE_TIME | TOPUSERFIELDS_USERID | TOPUSERFIELDS_CONNECT |
253 TOPUSERFIELDS_BYTES | TOPUSERFIELDS_SETYB | TOPUSERFIELDS_IN_CACHE_OUT |
254 TOPUSERFIELDS_USED_TIME | TOPUSERFIELDS_MILISEC | TOPUSERFIELDS_PTIME |
255 TOPUSERFIELDS_TOTAL | TOPUSERFIELDS_AVERAGE;
256 UserReportFields=USERREPORTFIELDS_CONNECT | USERREPORTFIELDS_BYTES | USERREPORTFIELDS_SETYB |
257 USERREPORTFIELDS_IN_CACHE_OUT | USERREPORTFIELDS_USED_TIME | USERREPORTFIELDS_MILISEC |
258 USERREPORTFIELDS_PTIME | USERREPORTFIELDS_TOTAL | USERREPORTFIELDS_AVERAGE;
259 strcpy(DataFileDelimiter,";");
260 DataFileFields=DATA_FIELD_USER | DATA_FIELD_DATE | DATA_FIELD_TIME | DATA_FIELD_URL | DATA_FIELD_CONNECT |
261 DATA_FIELD_BYTES | DATA_FIELD_IN_CACHE | DATA_FIELD_OUT_CACHE | DATA_FIELD_ELAPSED;
262 ShowReadStatistics=true;
263 strcpy(IndexSortOrder,"D");
264 ShowSargInfo=true;
265 ShowSargLogo=true;
266 strcpy(ParsedOutputLog,"no");
267 strcpy(ParsedOutputLogCompress,"/bin/gzip -f");
268 DisplayedValues=DISPLAY_ABBREV;
269 strcpy(HeaderFontSize,"9px");
270 strcpy(TitleFontSize,"11px");
271 strcpy(AuthUserTemplateFile,"sarg_htaccess");
272 set_download_suffix("7z,ace,arj,avi,bat,bin,bz2,bzip,cab,com,cpio,dll,doc,dot,exe,gz,iso,lha,lzh,mdb,mov,mp3,mpeg,mpg,mso,nrg,ogg,ppt,rar,rtf,shs,src,sys,tar,tgz,vcd,vob,wma,wmv,zip");
273 Graphs=true;
274 #if defined(FONTDIR)
275 strcpy(GraphFont,FONTDIR"/DejaVuSans.ttf");
276 #else
277 GraphFont[0]='\0';
278 #endif
279 strcpy(Ulimit,"20000");
280 NtlmUserFormat=NTLMUSERFORMAT_DOMAINUSER;
281 IndexTree=INDEX_TREE_FILE;
282 strcpy(RealtimeTypes,"GET,PUT,CONNECT");
283 RealtimeUnauthRec=REALTIME_UNAUTH_REC_SHOW;
284 RedirectorIgnoreDate=false;
285 DansguardianIgnoreDate=false;
286 DataFileUrl=DATAFILEURL_IP;
287 strcpy(MaxElapsed,"28800000");
288 BytesInSitesUsersReport=0;
289 UserAuthentication=0;
290 strcpy(LDAPHost,"127.0.0.1");
291 LDAPPort=389;
292 LDAPProtocolVersion=3;
293 LDAPBindDN[0]='\0';
294 LDAPBindPW[0]='\0';
295 LDAPBaseSearch[0]='\0';
296 strcpy(LDAPFilterSearch, "uid=%s");
297 strcpy(LDAPTargetAttr, "cn");
298
299 dia[0]='\0';
300 mes[0]='\0';
301 ano[0]='\0';
302 hora[0]='\0';
303 tmp[0]='\0';
304 tmp2[0]='\0';
305 tmp3[0]='\0';
306 wtemp[0]='\0';
307 wtemp2[0]='\0';
308 us[0]='\0';
309 date[0]='\0';
310 df[0]='\0';
311 uagent[0]='\0';
312 hexclude[0]='\0';
313 addr[0]='\0';
314 hm[0]='\0';
315 hmf[0]='\0';
316 site[0]='\0';
317 outdir[0]='\0';
318 elap[0]='\0';
319 email[0]='\0';
320 zip[0]='\0';
321 UserInvalidChar[0]='\0';
322 DataFile[0]='\0';
323 SquidGuardConf[0]='\0';
324 DansGuardianConf[0]='\0';
325 start_hour[0]='\0';
326 end_hour[0]='\0';
327
328 denied_count=0;
329 download_count=0;
330 authfail_count=0;
331 dansguardian_count=0;
332 squidguard_count=0;
333 useragent_count=0;
334 DeniedReportLimit=10;
335 AuthfailReportLimit=10;
336 DansGuardianReportLimit=10;
337 SquidGuardReportLimit=10;
338 DownloadReportLimit=50;
339 UserReportLimit=0;
340 debug=0;
341 debugz=0;
342 debugm=0;
343 iprel=false;
344 userip=false;
345 realt=false;
346 realtime_refresh=3;
347 realtime_access_log_lines=1000;
348 cost=0.01;
349 nocost=50000000;
350 ndownload=0;
351 squid24=false;
352 dfrom=0;
353 duntil=0;
354
355 bzero(IncludeUsers, sizeof(IncludeUsers));
356 bzero(ExcludeString, sizeof(ExcludeString));
357 first_user_file=NULL;
358 memset(&period,0,sizeof(period));
359
360 NAccessLog=0;
361 for(x=0; x<MAXLOGS; x++)
362 AccessLog[x][0]='\0';
363 AccessLogFromCmdLine=0;
364 RedirectorLogFromCmdLine=0;
365
366 strcpy(Title,_("Squid User Access Report"));
367
368 while((ch = getopt_long_only(argc, argv, "a:b:c:d:e:f:g:u:l:L:o:s:t:w:hijmnprvxyz",long_options,&option_index)) != -1){
369 switch(ch)
370 {
371 case 0:
372 break;
373 case 'a':
374 strcpy(addr,optarg);
375 break;
376 case 'b':
377 strcpy(uagent,optarg);
378 break;
379 case 'c':
380 strcpy(hexclude,optarg);
381 break;
382 case 'd':
383 strncpy(date,optarg,sizeof(date)-1);
384 date[sizeof(date)-1]='\0';
385 date_from(date, &dfrom, &duntil);
386 break;
387 case 'e':
388 strcpy(email,optarg);
389 break;
390 case 'f':
391 strcpy(ConfigFile,optarg);
392 break;
393 case 'g':
394 strcpy(df,optarg);
395 break;
396 case 'h':
397 usage(argv[0]);
398 exit(EXIT_SUCCESS);
399 case 'i':
400 iprel=true;
401 break;
402 case 'l':
403 if (NAccessLog>=MAXLOGS) {
404 debuga(_("Too many log files passed on command line with option -l.\n"));
405 exit(EXIT_FAILURE);
406 }
407 if (strlen(optarg)>=MAX_LOG_FILELEN) {
408 debuga(_("Log file name too long passed on command line with option -l: %s\n"),optarg);
409 exit(EXIT_FAILURE);
410 }
411 strcpy(AccessLog[NAccessLog],optarg);
412 NAccessLog++;
413 AccessLogFromCmdLine++;
414 break;
415 case 'L':
416 if (NRedirectorLogs>MAX_REDIRECTOR_LOGS) {
417 debuga(_("Too many redirector logs passed on command line with option -L.\n"));
418 exit(EXIT_FAILURE);
419 }
420 if (strlen(optarg)>=MAX_REDIRECTOR_FILELEN) {
421 debuga(_("Redirector log file name too long passed on command line with opton -L: %s\n"),optarg);
422 exit(EXIT_FAILURE);
423 }
424 strcpy(RedirectorLogs[NRedirectorLogs],optarg);
425 NRedirectorLogs++;
426 RedirectorLogFromCmdLine++;
427 break;
428 case 'm':
429 debugm++;
430 break;
431 case 'n':
432 dns=true;
433 break;
434 case 'o':
435 strcpy(outdir,optarg);
436 break;
437 case 'p':
438 userip=true;
439 break;
440 case 'r':
441 realt=true;
442 break;
443 case 's':
444 strcpy(site,optarg);
445 break;
446 case 't':
447 {
448 int h,m;
449
450 if(strstr(optarg,"-") == 0) {
451 strcpy(hm,optarg);
452 strcpy(hmf,optarg);
453 } else {
454 getword_start(&gwarea,optarg);
455 if (getword(hm,sizeof(hm),&gwarea,'-')<0 || getword(hmf,sizeof(hmf),&gwarea,'\0')<0) {
456 debuga(_("The time range passed on the command line with option -t is invalid\n"));
457 exit(EXIT_FAILURE);
458 }
459 }
460 if(sscanf(hm,"%d:%d",&h,&m)!=2) {
461 debuga(_("Time period must be MM or MM:SS. Exit\n"));
462 exit(EXIT_FAILURE);
463 }
464 sprintf(hm,"%02d%02d",h,m);
465 if(sscanf(hmf,"%d:%d",&h,&m)!=2) {
466 debuga(_("Time period must be MM or MM:SS. Exit\n"));
467 exit(EXIT_FAILURE);
468 }
469 sprintf(hmf,"%02d%02d",h,m);
470 break;
471 }
472 case 'u':
473 strcpy(us,optarg);
474 break;
475 case 'v':
476 version();
477 break;
478 case 'w':
479 strcpy(tmp,optarg);
480 break;
481 case 'x':
482 debug++;
483 break;
484 case 'y':
485 langcode++;
486 break;
487 case 'z':
488 debugz++;
489 break;
490 /*case ':':
491 debuga(_("Option -%c require an argument\n"),optopt);
492 errflg++;
493 break;*/
494 case '?':
495 usage(argv[0]);
496 exit(EXIT_FAILURE);
497 default:
498 abort();
499 }
500 }
501
502 if (errflg>0) {
503 usage(argv[0]);
504 exit(2);
505 }
506
507 if(debug) debuga(_("Init\n"));
508
509 if(ConfigFile[0] == '\0') snprintf(ConfigFile,sizeof(ConfigFile),"%s/sarg.conf",SYSCONFDIR);
510 if(access(ConfigFile, R_OK) != 0) {
511 debuga(_("Cannot open config file: %s - %s\n"),ConfigFile,strerror(errno));
512 exit(EXIT_FAILURE);
513 }
514
515 if(access(ConfigFile, R_OK) == 0)
516 getconf();
517
518 if(userip) UserIp=true;
519
520 if(dns) Ip2Name=true;
521
522 if(realt) {
523 realtime();
524 exit(EXIT_SUCCESS);
525 }
526
527 if(IndexTree == INDEX_TREE_FILE)
528 strcpy(ImageFile,"../images");
529 else
530 strcpy(ImageFile,"../../../images");
531
532 dataonly=0;
533 if(DataFile[0] != '\0')
534 dataonly++;
535
536 if(NAccessLog == 0) {
537 strcpy(AccessLog[0],"/var/log/squid/access.log");
538 NAccessLog++;
539 }
540
541 if(output_css) {
542 css_content(stdout);
543 exit(EXIT_SUCCESS);
544 }
545 if(split) {
546 splitlog(AccessLog[0], df, dfrom, duntil, convert);
547 exit(EXIT_SUCCESS);
548 }
549 if(convert) {
550 convlog(AccessLog[0], df, dfrom, duntil);
551 exit(EXIT_SUCCESS);
552 }
553
554 load_excludecodes(ExcludeCodes);
555
556 if(access(PasswdFile, R_OK) == 0) {
557 getusers(PasswdFile,debug);
558 puser++;
559 }
560
561 if(hexclude[0] == '\0')
562 strcpy(hexclude,ExcludeHosts);
563 if(hexclude[0] != '\0') {
564 gethexclude(hexclude,debug);
565 fhost=true;
566 }
567
568 if(ReportType == 0) {
569 ReportType=REPORT_TYPE_TOPUSERS | REPORT_TYPE_TOPSITES | REPORT_TYPE_USERS_SITES |
570 REPORT_TYPE_SITES_USERS | REPORT_TYPE_DATE_TIME | REPORT_TYPE_DENIED |
571 REPORT_TYPE_AUTH_FAILURES | REPORT_TYPE_SITE_USER_TIME_DATE | REPORT_TYPE_DOWNLOADS;
572 }
573
574 if(access(ExcludeUsers, R_OK) == 0) {
575 getuexclude(ExcludeUsers,debug);
576 fuser=true;
577 }
578
579 indexonly=0;
580 if(fuser) {
581 if(is_indexonly())
582 indexonly++;
583 }
584 if(strcmp(ExcludeUsers,"indexonly") == 0) indexonly++;
585 if(Index == INDEX_ONLY) indexonly++;
586
587 if(MaxElapsed[0] != '\0') max_elapsed=atol(MaxElapsed);
588
589 if(outdir[0] == '\0') strcpy(outdir,OutputDir);
590 strcat(outdir,"/");
591
592 if(uagent[0] == '\0') strcpy(uagent,UserAgentLog);
593
594 if(tmp[0] == '\0') strcpy(tmp,TempDir);
595 else strcpy(TempDir,tmp);
596
597 if(df[0] == '\0') strcpy(df,DateFormat);
598 else strcpy(DateFormat,df);
599
600 if(df[0] == '\0') {
601 strcpy(df,"u");
602 strcpy(DateFormat,"u");
603 }
604 if (df[0]=='w')
605 IndexTree=INDEX_TREE_FILE;
606
607 if(email[0] == '\0' && OutputEmail[0] != '\0') strcpy(email,OutputEmail);
608
609 strcpy(tmp2,tmp);
610
611 if(email[0] != '\0') {
612 snprintf(wtemp2,sizeof(wtemp2),"%s/sarg",tmp2);
613 my_mkdir(wtemp2);
614 strcat(tmp2,"/sarg");
615 strcpy(outdir,tmp2);
616 strcat(outdir,"/");
617 }
618
619 strcat(tmp2,"/sarg.log");
620
621 sprintf(tmp3,"%s/sarg",tmp);
622 if(access(tmp3, R_OK) == 0) {
623 unlinkdir(tmp3,1);
624 }
625 my_mkdir(tmp3);
626 strcpy(denied_unsort,tmp3);
627 strcpy(denied_sort,tmp3);
628 strcpy(authfail_unsort,tmp3);
629 strcat(denied_unsort,"/denied.log.unsort");
630 strcat(denied_sort,"/denied.log");
631 strcat(authfail_unsort,"/authfail.log.unsort");
632
633 if(debug) {
634 debuga(_("Parameters:\n"));
635 debuga(_(" Hostname or IP address (-a) = %s\n"),addr);
636 debuga(_(" Useragent log (-b) = %s\n"),uagent);
637 debuga(_(" Exclude file (-c) = %s\n"),hexclude);
638 debuga(_(" Date from-until (-d) = %s\n"),date);
639 debuga(_(" Email address to send reports (-e) = %s\n"),email);
640 debuga(_(" Config file (-f) = %s\n"),ConfigFile);
641 if(strcmp(df,"e") == 0)
642 debuga(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
643 if(strcmp(df,"u") == 0)
644 debuga(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
645 if(strcmp(df,"w") == 0)
646 debuga(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
647 debuga(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
648 for (iarq=0 ; iarq<NAccessLog ; iarq++)
649 debuga(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
650 for (iarq=0 ; iarq<NRedirectorLogs ; iarq++)
651 debuga(_(" Redirector log (-L) = %s\n"),RedirectorLogs[iarq]);
652 debuga(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
653 debuga(_(" Output dir (-o) = %s\n"),outdir);
654 debuga(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
655 debuga(_(" Accessed site (-s) = %s\n"),site);
656 debuga(_(" Time (-t) = %s\n"),hm);
657 debuga(_(" User (-u) = %s\n"),us);
658 debuga(_(" Temporary dir (-w) = %s\n"),tmp);
659 debuga(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
660 debuga(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
661 debuga("\n");
662 }
663
664 if(debugm) {
665 printf(_("Parameters:\n"));
666 printf(_(" Hostname or IP address (-a) = %s\n"),addr);
667 printf(_(" Useragent log (-b) = %s\n"),uagent);
668 printf(_(" Exclude file (-c) = %s\n"),hexclude);
669 printf(_(" Date from-until (-d) = %s\n"),date);
670 printf(_(" Email address to send reports (-e) = %s\n"),email);
671 printf(_(" Config file (-f) = %s\n"),ConfigFile);
672 if(strcmp(df,"e") == 0)
673 printf(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
674 if(strcmp(df,"u") == 0)
675 printf(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
676 if(strcmp(df,"w") == 0)
677 printf(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
678 printf(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
679 for (iarq=0 ; iarq<NAccessLog ; iarq++)
680 printf(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
681 for (iarq=0 ; iarq<NRedirectorLogs ; iarq++)
682 printf(_(" Redirector log (-L) = %s\n"),RedirectorLogs[iarq]);
683 printf(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
684 printf(_(" Output dir (-o) = %s\n"),outdir);
685 printf(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
686 printf(_(" Accessed site (-s) = %s\n"),site);
687 printf(_(" Time (-t) = %s\n"),hm);
688 printf(_(" User (-u) = %s\n"),us);
689 printf(_(" Temporary dir (-w) = %s\n"),tmp);
690 printf(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
691 printf(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
692 printf(_("sarg version: %s\n"),VERSION);
693 }
694
695 if(debug)
696 debuga(_("sarg version: %s\n"),VERSION);
697
698 maxopenfiles=MAX_OPEN_USER_FILES;
699 #ifdef HAVE_RLIM_T
700 if (Ulimit[0] != '\0') {
701 struct rlimit rl;
702 long l1, l2;
703 int rc=0;
704
705 #if defined(RLIMIT_NOFILE)
706 getrlimit (RLIMIT_NOFILE, &rl);
707 #elif defined(RLIMIT_OFILE)
708 getrlimit (RLIMIT_OFILE, &rl);
709 #else
710 #warning "No rlimit resource for the number of open files"
711 #endif
712 l1 = rl.rlim_cur;
713 l2 = rl.rlim_max;
714
715 rl.rlim_cur = atol(Ulimit);
716 rl.rlim_max = atol(Ulimit);
717 #if defined(RLIMIT_NOFILE)
718 rc=setrlimit (RLIMIT_NOFILE, &rl);
719 #elif defined(RLIMIT_OFILE)
720 rc=setrlimit (RLIMIT_OFILE, &rl);
721 #else
722 #warning "No rlimit resource for the number of open files"
723 #endif
724 if(rc == -1) {
725 debuga(_("setrlimit error - %s\n"),strerror(errno));
726 }
727
728 if(debug)
729 debuga("Maximum file descriptor: cur=%ld max=%ld, changed to cur="RLIM_STRING" max="RLIM_STRING,l1,l2,rl.rlim_cur,rl.rlim_max);
730 }
731 #endif
732
733 init_usertab(UserTabFile);
734
735 if ((line=longline_create())==NULL) {
736 debuga(_("Not enough memory to read a log file\n"));
737 exit(EXIT_FAILURE);
738 }
739
740 snprintf(sz_Download_Unsort,sizeof(sz_Download_Unsort),"%s/sarg/download.unsort", tmp);
741
742 if(DataFile[0]=='\0') {
743 if((ReportType & REPORT_TYPE_DENIED) != 0) {
744 if((fp_denied=MY_FOPEN(denied_unsort,"w"))==NULL) {
745 debuga(_("(log) Cannot open file: %s - %s\n"),denied_unsort,strerror(errno));
746 exit(EXIT_FAILURE);
747 }
748 }
749
750 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
751 if((fp_authfail=MY_FOPEN(authfail_unsort,"w"))==NULL) {
752 debuga(_("(log) Cannot open file: %s - %s\n"),authfail_unsort,strerror(errno));
753 exit(EXIT_FAILURE);
754 }
755 }
756 }
757
758 for (iarq=0 ; iarq<NAccessLog ; iarq++) {
759 strcpy(arq,AccessLog[iarq]);
760
761 strcpy(arqtt,arq);
762
763 if(strcmp(arq,"-")==0) {
764 if(debug)
765 debuga(_("Reading access log file: from stdin\n"));
766 fp_in=stdin;
767 from_stdin=true;
768 } else {
769 if (date[0]!='\0') {
770 if (stat(arq,&logstat)!=0) {
771 debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
772 } else {
773 struct tm *logtime=localtime(&logstat.st_mtime);
774 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
775 debuga(_("Ignoring old log file %s\n"),arq);
776 continue;
777 }
778 }
779 }
780 fp_in=decomp(arq,&from_pipe);
781 if(fp_in==NULL) {
782 debuga(_("(log) Cannot open log file: %s - %s\n"),arq,strerror(errno));
783 exit(EXIT_FAILURE);
784 }
785 if(debug) debuga(_("Reading access log file: %s\n"),arq);
786 from_stdin=false;
787 }
788 ilf=ILF_Unknown;
789 download_flag=false;
790 // pre-read the file only if we have to show stats
791 if(ShowReadStatistics && !from_stdin && !from_pipe) {
792 size_t nread,i;
793 bool skipcr=false;
794 char tmp4[MAXLEN];
795
796 recs1=0UL;
797 recs2=0UL;
798
799 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
800 for (i=0 ; i<nread ; i++)
801 if (skipcr) {
802 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
803 skipcr=false;
804 }
805 } else {
806 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
807 skipcr=true;
808 recs1++;
809 }
810 }
811 }
812 rewind(fp_in);
813 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
814 putchar('\r');
815 fflush( stdout ) ;
816 }
817
818 longline_reset(line);
819
820 while ((linebuf=longline_read(fp_in,line))!=NULL) {
821 blen=strlen(linebuf);
822
823 if (ilf==ILF_Unknown) {
824 if(strncmp(linebuf,"#Software: Mic",14) == 0) {
825 fixendofline(linebuf);
826 if (debug)
827 debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
828 ilf=ILF_Isa;
829 ilf_count[ilf]++;
830 continue;
831 }
832
833 if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
834 if (getperiod_fromsarglog(arqtt,&period)<0) {
835 debuga(_("The name of the file is invalid: %s\n"),arq);
836 exit(EXIT_FAILURE);
837 }
838 ilf=ILF_Sarg;
839 ilf_count[ilf]++;
840 continue;
841 }
842 }
843
844 if(!fp_log && strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg) {
845 if(access(ParsedOutputLog,R_OK) != 0) {
846 my_mkdir(ParsedOutputLog);
847 }
848 sprintf(arq_log,"%s/sarg_temp.log",ParsedOutputLog);
849 if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
850 debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
851 exit(EXIT_FAILURE);
852 }
853 fputs("*** SARG Log ***\n",fp_log);
854 }
855
856 recs2++;
857 if( ShowReadStatistics && !from_stdin && --OutputNonZero<=0) {
858 double perc = recs2 * 100. / recs1 ;
859 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs1,perc);
860 putchar('\r');
861 fflush (stdout);
862 OutputNonZero = REPORT_EVERY_X_LINES ;
863 }
864 if(blen < 58) continue;
865 if(strstr(linebuf,"HTTP/0.0") != 0) continue;
866 if(strstr(linebuf,"logfile turned over") != 0) continue;
867 if(linebuf[0] == ' ') continue;
868
869 // exclude_string
870 if(ExcludeString[0] != '\0') {
871 bool exstring=false;
872 getword_start(&gwarea,ExcludeString);
873 while(strchr(gwarea.current,':') != 0) {
874 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
875 debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
876 exit(EXIT_FAILURE);
877 }
878 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
879 exstring=true;
880 break;
881 }
882 }
883 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
884 exstring=true;
885 if(exstring) continue;
886 }
887
888 totregsl++;
889 if(debugm)
890 printf("BUF=%s\n",linebuf);
891
892 t=NULL;
893 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
894 getword_start(&gwarea,linebuf);
895 if (getword(data,sizeof(data),&gwarea,' ')<0) {
896 debuga(_("Maybe you have a broken time in your access.log file\n"));
897 exit(EXIT_FAILURE);
898 }
899 if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
900 strcpy(ip,data);
901 strcpy(elap,"0");
902 if(squid24) {
903 if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
904 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
905 exit(EXIT_FAILURE);
906 }
907 } else {
908 if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
909 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
910 exit(EXIT_FAILURE);
911 }
912 }
913 if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
914 getword(fun,sizeof(fun),&gwarea,' ')<0) {
915 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
916 exit(EXIT_FAILURE);
917 }
918 if (getword_ptr(linebuf,&url,&gwarea,' ')<0) {
919 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
920 exit(EXIT_FAILURE);
921 }
922 if (getword_skip(MAXLEN,&gwarea,' ')<0) {
923 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
924 exit(EXIT_FAILURE);
925 }
926 if (getword(code2,sizeof(code2),&gwarea,' ')<0) {
927 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
928 exit(EXIT_FAILURE);
929 }
930 if (getword(tam,sizeof(tam),&gwarea,' ')<0) {
931 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
932 exit(EXIT_FAILURE);
933 }
934 if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
935 if (getword(code,sizeof(code),&gwarea,' ')<0) {
936 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
937 exit(EXIT_FAILURE);
938 }
939 } else {
940 if (getword(code,sizeof(code),&gwarea,'\0')<0) {
941 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
942 exit(EXIT_FAILURE);
943 }
944 }
945
946 if ((str = strchr(code, ':')) != NULL)
947 *str = '/';
948
949 if(strcmp(tam,"\0") == 0)
950 strcpy(tam,"0");
951
952 ilf=ILF_Common;
953 ilf_count[ilf]++;
954
955 getword_start(&gwarea,data+1);
956 if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
957 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
958 exit(EXIT_FAILURE);
959 }
960 if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
961 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
962 exit(EXIT_FAILURE);
963 }
964 getword_start(&gwarea,data);
965 if (getword_atoll(&iday,&gwarea,'/')<0){
966 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
967 exit(EXIT_FAILURE);
968 }
969 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
970 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
971 exit(EXIT_FAILURE);
972 }
973 if (getword_atoll(&iyear,&gwarea,'/')<0){
974 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
975 exit(EXIT_FAILURE);
976 }
977
978 imonth=month2num(mes)+1;
979 idata=builddia(iday,imonth,iyear);
980 computedate(iyear,imonth,iday,&tt);
981 t=&tt;
982 }
983
984 if(ilf==ILF_Unknown || ilf==ILF_Squid) {
985 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
986 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
987 exit(EXIT_FAILURE);
988 }
989 while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
990 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
991 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
992 exit(EXIT_FAILURE);
993 }
994 if(strlen(elap) < 1) continue;
995 if (getword(ip,sizeof(ip),&gwarea,' ')<0){
996 debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq);
997 exit(EXIT_FAILURE);
998 }
999 if (getword(code,sizeof(code),&gwarea,' ')<0){
1000 debuga(_("Maybe you have a broken result code in your %s file\n"),arq);
1001 exit(EXIT_FAILURE);
1002 }
1003 if (getword(tam,sizeof(tam),&gwarea,' ')<0){
1004 debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq);
1005 exit(EXIT_FAILURE);
1006 }
1007 if (getword(fun,sizeof(fun),&gwarea,' ')<0){
1008 debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
1009 exit(EXIT_FAILURE);
1010 }
1011 if (getword_ptr(linebuf,&url,&gwarea,' ')<0){
1012 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
1013 exit(EXIT_FAILURE);
1014 }
1015 if (getword(user,sizeof(user),&gwarea,' ')<0){
1016 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
1017 exit(EXIT_FAILURE);
1018 }
1019 ilf=ILF_Squid;
1020 ilf_count[ilf]++;
1021
1022 tnum=atoi(data);
1023 t=localtime(&tnum);
1024 if (t == NULL) {
1025 debuga(_("Cannot convert the timestamp from the squid log file\n"));
1026 exit(EXIT_FAILURE);
1027 }
1028
1029 strftime(tbuf2, sizeof(tbuf2), "%H%M", t);
1030
1031 idata=(t->tm_year+1900)*10000+(t->tm_mon+1)*100+t->tm_mday;
1032 }
1033 }
1034 if (ilf==ILF_Sarg) {
1035 getword_start(&gwarea,linebuf);
1036 if (getword(data,sizeof(data),&gwarea,'\t')<0){
1037 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1038 exit(EXIT_FAILURE);
1039 }
1040 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
1041 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1042 exit(EXIT_FAILURE);
1043 }
1044 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
1045 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1046 exit(EXIT_FAILURE);
1047 }
1048 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
1049 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1050 exit(EXIT_FAILURE);
1051 }
1052 if (getword_ptr(linebuf,&url,&gwarea,'\t')<0){
1053 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1054 exit(EXIT_FAILURE);
1055 }
1056 if (getword(tam,sizeof(tam),&gwarea,'\t')<0){
1057 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1058 exit(EXIT_FAILURE);
1059 }
1060 if (getword(code,sizeof(code),&gwarea,'\t')<0){
1061 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1062 exit(EXIT_FAILURE);
1063 }
1064 if (getword(elap,sizeof(elap),&gwarea,'\t')<0){
1065 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1066 exit(EXIT_FAILURE);
1067 }
1068 if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
1069 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1070 exit(EXIT_FAILURE);
1071 }
1072 getword_start(&gwarea,data);
1073 if (getword_atoll(&iday,&gwarea,'/')<0 || iday<1 || iday>31){
1074 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1075 exit(EXIT_FAILURE);
1076 }
1077 if (getword_atoll(&imonth,&gwarea,'/')<0 || imonth<1 || imonth>12){
1078 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1079 exit(EXIT_FAILURE);
1080 }
1081 if (getword_atoll(&iyear,&gwarea,'\0')<0){
1082 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1083 exit(EXIT_FAILURE);
1084 }
1085 idata=builddia(iday,imonth,iyear);
1086 computedate(iyear,imonth,iday,&tt);
1087 t=&tt;
1088 }
1089 if (ilf==ILF_Isa) {
1090 if (linebuf[0] == '#') {
1091 int ncols,cols[ISACOL_Last];
1092
1093 fixendofline(linebuf);
1094 getword_start(&gwarea,linebuf);
1095 // remove the #Fields: column at the beginning of the line
1096 if (getword_skip(1000,&gwarea,' ')<0){
1097 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1098 exit(EXIT_FAILURE);
1099 }
1100 for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
1101 ncols=0;
1102 while(gwarea.current[0] != '\0') {
1103 if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
1104 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1105 exit(EXIT_FAILURE);
1106 }
1107 if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
1108 if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
1109 if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
1110 if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
1111 if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
1112 if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
1113 if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
1114 if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
1115 ncols++;
1116 }
1117 if (cols[ISACOL_Ip]>=0) {
1118 isa_ncols=ncols;
1119 for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
1120 isa_cols[ncols]=cols[ncols];
1121 }
1122 continue;
1123 }
1124 if (!isa_ncols) continue;
1125 getword_start(&gwarea,linebuf);
1126 for (x=0 ; x<isa_ncols ; x++) {
1127 if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
1128 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1129 exit(EXIT_FAILURE);
1130 }
1131 if (x==isa_cols[ISACOL_Ip]) {
1132 if (strlen(str)>=sizeof(ip)) {
1133 debuga(_("Maybe you have a broken IP in your %s file\n"),arq);
1134 exit(EXIT_FAILURE);
1135 }
1136 strcpy(ip,str);
1137 } else if (x==isa_cols[ISACOL_UserName]) {
1138 if (strlen(str)>=sizeof(user)) {
1139 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
1140 exit(EXIT_FAILURE);
1141 }
1142 strcpy(user,str);
1143 } else if (x==isa_cols[ISACOL_Date]) {
1144 if (strlen(str)>=sizeof(data)) {
1145 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1146 exit(EXIT_FAILURE);
1147 }
1148 strcpy(data,str);
1149 } else if (x==isa_cols[ISACOL_Time]) {
1150 if (strlen(str)>=sizeof(hora)) {
1151 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1152 exit(EXIT_FAILURE);
1153 }
1154 strcpy(hora,str);
1155 } else if (x==isa_cols[ISACOL_TimeTaken]) {
1156 if (strlen(str)>=sizeof(elap)) {
1157 debuga(_("Maybe you have a broken download duration in your %s file\n"),arq);
1158 exit(EXIT_FAILURE);
1159 }
1160 strcpy(elap,str);
1161 } else if (x==isa_cols[ISACOL_Bytes]) {
1162 if (strlen(str)>=sizeof(tam)) {
1163 debuga(_("Maybe you have a broken download size in your %s file\n"),arq);
1164 exit(EXIT_FAILURE);
1165 }
1166 strcpy(tam,str);
1167 } else if (x==isa_cols[ISACOL_Uri]) {
1168 url=str;
1169 } else if (x==isa_cols[ISACOL_Status]) {
1170 if (strlen(str)>=sizeof(code)) {
1171 debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
1172 exit(EXIT_FAILURE);
1173 }
1174 strcpy(code,str);
1175 }
1176 }
1177
1178 if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
1179 sprintf(val1,"DENIED/%s",code);
1180 strcpy(code,val1);
1181 }
1182 getword_start(&gwarea,data);
1183 if (getword_atoll(&iyear,&gwarea,'-')<0){
1184 debuga(_("Maybe you have a broken year in your %s file\n"),arq);
1185 exit(EXIT_FAILURE);
1186 }
1187 if (getword_atoll(&imonth,&gwarea,'-')<0){
1188 debuga(_("Maybe you have a broken month in your %s file\n"),arq);
1189 exit(EXIT_FAILURE);
1190 }
1191 if (getword_atoll(&iday,&gwarea,'\0')<0){
1192 debuga(_("Maybe you have a broken day in your %s file\n"),arq);
1193 exit(EXIT_FAILURE);
1194 }
1195
1196 idata=builddia(iday,imonth,iyear);
1197 computedate(iyear,imonth,iday,&tt);
1198 t=&tt;
1199 }
1200 if (t==NULL) {
1201 debuga(_("Unknown input log file format\n"));
1202 break;
1203 }
1204
1205 strftime(dia, sizeof(dia), "%d/%m/%Y", t);
1206 snprintf(hora,sizeof(hora),"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec);
1207
1208 if(debugm)
1209 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",date,idata,dfrom,duntil);
1210
1211 if(date[0] != '\0'){
1212 if(idata < dfrom || idata > duntil) continue;
1213 }
1214
1215 // Record only hours usage which is required
1216 if (t) {
1217 if( bsearch( &( t -> tm_wday ), weekdays.list, weekdays.len,
1218 sizeof( int ), compar ) == NULL )
1219 continue;
1220
1221 if( bsearch( &( t -> tm_hour ), hours.list, hours.len,
1222 sizeof( int ), compar ) == NULL )
1223 continue;
1224 }
1225
1226
1227 if(strlen(user) > MAX_USER_LEN) {
1228 if (debugm) printf(_("User ID too long: %s\n"),user);
1229 totregsx++;
1230 continue;
1231 }
1232
1233 // include_users
1234 if(IncludeUsers[0] != '\0') {
1235 sprintf(val1,":%s:",user);
1236 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
1237 continue;
1238 }
1239
1240 if(vercode(code)) {
1241 if (debugm) printf(_("Excluded code: %s\n"),code);
1242 totregsx++;
1243 continue;
1244 }
1245
1246 if(testvaliduserchar(user))
1247 continue;
1248
1249 #if 0
1250 if((str = strstr(user,"%20")) != NULL) {
1251 /*
1252 This is a patch introduced to solve bug #1624251 reported at sourceforge but
1253 the side effect is to truncate the name at the first space and merge the reports
1254 of people whose name is identical up to the first space.
1255
1256 The old code used to truncate the user name at the first % if a %20 was
1257 found anywhere in the string. That means the string could be truncated
1258 at the wrong place if another % occured before the %20. This new code should
1259 avoid that problem and only truncate at the space. There is no bug
1260 report indicating that anybody noticed this.
1261 */
1262 *str='\0';
1263 }
1264
1265 /*
1266 Code prior to 2.2.7 used to replace any %xx by a dot as long as a %5c was
1267 found in the user name.
1268 */
1269 while((str = strstr(user,"%5c")) != NULL) {
1270 *str='.';
1271 for (x=3 ; str[x] ; x++) str[x-2]=str[x];
1272 }
1273 #endif
1274
1275 urly=url;
1276
1277 if(ilf!=ILF_Sarg) {
1278 /*
1279 The full URL is not saved in sarg log. There is no point in testing the URL to detect
1280 a downloaded file.
1281 */
1282 download_flag=is_download_suffix(url);
1283 if (download_flag) {
1284 download_url=url;
1285 download_count++;
1286 }
1287 } else
1288 download_flag=false;
1289
1290 // remove any protocol:// at the beginning of the URL
1291 if ((str = strchr(url,'/')) != NULL && str[1] == '/') {
1292 int i;
1293
1294 str+=2;
1295 for (i=0 ; str[i] ; i++)
1296 url[i]=str[i];
1297 url[i]='\0';
1298 }
1299
1300 if(!LongUrl) {
1301 url_hostname(url,hostname,sizeof(hostname));
1302 url=hostname;
1303 }
1304
1305 if(url[0] == '\0') continue;
1306
1307 if(addr[0] != '\0'){
1308 if(strcmp(addr,ip)!=0) continue;
1309 }
1310 if(fhost) {
1311 if(!vhexclude(url)) {
1312 if (debugm) printf(_("Excluded site: %s\n"),url);
1313 totregsx++;
1314 continue;
1315 }
1316 }
1317
1318 if(hm[0] != '\0') {
1319 hmr[0]='\0';
1320 chm++;
1321 getword_start(&gwarea,hora);
1322 while(chm) {
1323 if (getword_multisep(warea,sizeof(warea),&gwarea,':')<0){
1324 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1325 exit(EXIT_FAILURE);
1326 }
1327 strncat(hmr,warea,2);
1328 chm--;
1329 }
1330 strncat(hmr,gwarea.current,2);
1331
1332 if(atoi(hmr) < atoi(hm) || atoi(hmr) > atoi(hmf)) continue;
1333 }
1334
1335 if(site[0] != '\0'){
1336 if(strstr(url,site)==0) continue;
1337 }
1338
1339 if(UserIp) {
1340 strcpy(user,ip);
1341 id_is_ip=true;
1342 } else {
1343 id_is_ip=false;
1344 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
1345 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
1346 strcpy(user,ip);
1347 id_is_ip=true;
1348 }
1349 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
1350 continue;
1351 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
1352 strcpy(user,"everybody");
1353 } else {
1354 strlow(user);
1355 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
1356 if((str = strchr(user,'_')) != 0) {
1357 strcpy(warea,str+1);
1358 strcpy(user,warea);
1359 }
1360 if((str = strchr(user,'+')) != 0) {
1361 strcpy(warea,str+1);
1362 strcpy(user,warea);
1363 }
1364 }
1365 }
1366 }
1367
1368 if(us[0] != '\0'){
1369 if(strcmp(user,us)!=0) continue;
1370 }
1371
1372 if(puser) {
1373 sprintf(wuser,":%s:",user);
1374 if(strstr(userfile, wuser) == 0)
1375 continue;
1376 }
1377
1378 if(fuser) {
1379 if(!vuexclude(user)) {
1380 if (debugm) printf(_("Excluded user: %s\n"),user);
1381 totregsx++;
1382 continue;
1383 }
1384 }
1385
1386 if(strcmp(user,"-") ==0 || strcmp(user," ") ==0 || strcmp(user,"") ==0 || strcmp(user,":") ==0)
1387 continue;
1388
1389 if(max_elapsed) {
1390 if(atol(elap)>max_elapsed) {
1391 elap[0]='0';
1392 elap[1]='\0';
1393 }
1394 }
1395
1396 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
1397 fixendofline(str);
1398 sprintf(smartfilter,"\"%s\"",str+1);
1399 } else sprintf(smartfilter,"\"\"");
1400
1401 nopen=0;
1402 prev_ufile=NULL;
1403 for (ufile=first_user_file ; ufile && strcmp(user,ufile->user->id)!=0 ; ufile=ufile->next) {
1404 prev_ufile=ufile;
1405 if (ufile->file) nopen++;
1406 }
1407 if (!ufile) {
1408 ufile=malloc(sizeof(*ufile));
1409 if (!ufile) {
1410 debuga(_("Not enough memory to store the user %s\n"),user);
1411 exit(EXIT_FAILURE);
1412 }
1413 memset(ufile,0,sizeof(*ufile));
1414 ufile->next=first_user_file;
1415 first_user_file=ufile;
1416 uinfo=userinfo_create(user);
1417 ufile->user=uinfo;
1418 uinfo->id_is_ip=id_is_ip;
1419 } else {
1420 if (prev_ufile) {
1421 prev_ufile->next=ufile->next;
1422 ufile->next=first_user_file;
1423 first_user_file=ufile;
1424 }
1425 }
1426
1427 if (ufile->file==NULL) {
1428 if (nopen>=maxopenfiles) {
1429 x=0;
1430 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
1431 if (ufile1->file!=NULL) {
1432 if (x>=maxopenfiles) {
1433 if (fclose(ufile1->file)==EOF) {
1434 debuga(_("Failed to close the log file of user %s - %s\n"),ufile1->user->id,strerror(errno));
1435 exit(EXIT_FAILURE);
1436 }
1437 ufile1->file=NULL;
1438 }
1439 x++;
1440 }
1441 }
1442 }
1443 if (snprintf (tmp3, sizeof(tmp3), "%s/sarg/%s.unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
1444 debuga(_("Temporary user file name too long: %s/sarg/%s.unsort\n"), tmp, ufile->user->filename);
1445 exit(EXIT_FAILURE);
1446 }
1447 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
1448 debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno));
1449 exit (1);
1450 }
1451 }
1452
1453 /*if ( strcmp ( user , sz_Last_User ) != 0 ) {
1454 if ( fp_Write_User )
1455 fclose( fp_Write_User ) ;
1456 sprintf (tmp3, "%s/sarg/%s.unsort", tmp, user);
1457
1458 if ((fp_Write_User = MY_FOPEN (tmp3, "a")) == NULL) {
1459 fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], _("Cannot open temporary file"), tmp3, strerror(errno));
1460 exit (1);
1461 }
1462 strcpy( sz_Last_User , user ) ;
1463 }*/
1464 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,ip,url,tam,code,elap,smartfilter)<=0) {
1465 debuga(_("Write error in the log file of user %s\n"),user);
1466 exit(EXIT_FAILURE);
1467 }
1468
1469 if(fp_log && ilf!=ILF_Sarg)
1470 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,url,tam,code,elap,smartfilter);
1471
1472 totregsg++;
1473
1474 if(!dataonly && download_flag && download_url && strstr(code,"DENIED") == 0) {
1475 ndownload = 1;
1476
1477 if ( ! fp_Download_Unsort ) {
1478 if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
1479 debuga(_("(log) Cannot open temporary file: %s - %s\n"),tmp3, strerror(errno));
1480 exit (1);
1481 }
1482 }
1483 fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,download_url);
1484 }
1485
1486 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
1487 if(fp_denied && strstr(code,"DENIED/403") != 0) {
1488 fprintf(fp_denied, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1489 denied_count++;
1490 }
1491 if(fp_authfail && (strstr(code,"DENIED/401") != 0 || strstr(code,"DENIED/407") != 0)) {
1492 fprintf(fp_authfail, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1493 authfail_count++;
1494 }
1495 }
1496
1497 if (ilf!=ILF_Sarg) {
1498 if(!totper || idata<mindate){
1499 mindate=idata;
1500 memcpy(&period.start,t,sizeof(*t));
1501 strcpy(start_hour,tbuf2);
1502 }
1503 if (!totper || idata>maxdate) {
1504 maxdate=idata;
1505 memcpy(&period.end,t,sizeof(*t));
1506 }
1507 totper=true;
1508 }
1509
1510 if(debugm){
1511 printf("IP=\t%s\n",ip);
1512 printf("USER=\t%s\n",user);
1513 printf("ELAP=\t%s\n",elap);
1514 printf("DATE=\t%s\n",dia);
1515 printf("TIME=\t%s\n",hora);
1516 printf("FUNC=\t%s\n",fun);
1517 printf("URL=\t%s\n",url);
1518 printf("CODE=\t%s\n",code);
1519 printf("LEN=\t%s\n",tam);
1520 }
1521 }
1522 if (!from_stdin) {
1523 if (from_pipe)
1524 pclose(fp_in);
1525 else
1526 fclose(fp_in);
1527 if( ShowReadStatistics )
1528 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs1, (float) 100 );
1529 }
1530 }
1531
1532 if (debug)
1533 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
1534
1535 longline_destroy(&line);
1536 if ( fp_Download_Unsort )
1537 fclose (fp_Download_Unsort);
1538
1539 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
1540 ufile1=ufile->next;
1541 if (ufile->file!=NULL) fclose(ufile->file);
1542 free(ufile);
1543 }
1544
1545 free_download();
1546 free_excludecodes();
1547 free_exclude();
1548
1549 if(debug) {
1550 int totalcount=0;
1551
1552 for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
1553
1554 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
1555 debuga(_("Log with mixed records format (squid and common log)\n"));
1556
1557 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
1558 debuga(_("Common log format\n"));
1559
1560 if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
1561 debuga(_("Squid log format\n"));
1562
1563 if(ilf_count[ILF_Sarg]>0)
1564 debuga(_("Sarg log format\n"));
1565
1566 if(totalcount==0 && totregsg)
1567 debuga(_("Log with invalid format\n"));
1568 }
1569
1570 if(!totregsg){
1571 debuga(_("No records found\n"));
1572 debuga(_("End\n"));
1573 if(fp_denied) fclose(fp_denied);
1574 if(fp_authfail) fclose(fp_authfail);
1575 userinfo_free();
1576 if(userfile) free(userfile);
1577 close_usertab();
1578 exit(EXIT_SUCCESS);
1579 }
1580
1581 if (date[0]!='\0') {
1582 char date0[30], date1[30];
1583
1584 strftime(date0,sizeof(date0),"%d/%m/%Y",&period.start);
1585 strftime(date1,sizeof(date1),"%d/%m/%Y",&period.end);
1586 debuga(_("Period covered by log files: %s-%s\n"),date0,date1);
1587 getperiod_fromrange(&period,dfrom,duntil);
1588 }
1589 if (getperiod_buildtext(&period)<0) {
1590 debuga(_("Failed to build the string representation of the date range\n"));
1591 exit(EXIT_FAILURE);
1592 }
1593
1594 if(debugz){
1595 debugaz("data",dia);
1596 debugaz("period",period.text);
1597 }
1598
1599 if(debug)
1600 debuga(_("Period: %s\n"),period.text);
1601
1602 // fclose(fp_ou);
1603 if(fp_denied)
1604 fclose(fp_denied);
1605 if(fp_authfail)
1606 fclose(fp_authfail);
1607
1608 if(fp_log != NULL) {
1609 fclose(fp_log);
1610 strcpy(end_hour,tbuf2);
1611 strftime(val2,sizeof(val2),"%d%m%Y",&period.start);
1612 strftime(val1,sizeof(val1),"%d%m%Y",&period.end);
1613 sprintf(val4,"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour);
1614 if (rename(arq_log,val4)) {
1615 debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno));
1616 } else {
1617 strcpy(arq_log,val4);
1618
1619 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
1620 /*
1621 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
1622 necessary around the command name, put them in the configuration file.
1623 */
1624 sprintf(val1,"%s \"%s\"",ParsedOutputLogCompress,arq_log);
1625 cstatus=system(val1);
1626 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1627 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
1628 debuga(_("command: %s\n"),val1);
1629 exit(EXIT_FAILURE);
1630 }
1631 }
1632 }
1633 if(debug)
1634 debuga(_("Sarg parsed log saved as %s\n"),arq_log);
1635 }
1636
1637 if(DataFile[0] == '\0' && (ReportType & REPORT_TYPE_DENIED) != 0) {
1638 sprintf(csort,"sort -T \"%s\" -k 3,3 -k 5,5 -o \"%s\" \"%s\"",tmp,denied_sort,denied_unsort);
1639 cstatus=system(csort);
1640 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1641 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
1642 debuga(_("sort command: %s\n"),csort);
1643 exit(EXIT_FAILURE);
1644 }
1645 unlink(denied_unsort);
1646 }
1647
1648 sort_users_log(tmp, debug);
1649
1650 if(DataFile[0] != '\0')
1651 data_file(tmp);
1652 else
1653 gerarel();
1654
1655 unlink(tmp2);
1656 if((ReportType & REPORT_TYPE_DENIED) != 0)
1657 unlink(denied_sort);
1658
1659 if(strcmp(tmp,"/tmp") != 0) {
1660 unlinkdir(tmp,0);
1661 }
1662
1663 userinfo_free();
1664 if(userfile)
1665 free(userfile);
1666 close_usertab();
1667
1668 if(debug)
1669 debuga(_("End\n"));
1670
1671 exit(EXIT_SUCCESS);
1672
1673 }
1674
1675
1676 static void getusers(const char *pwdfile, int debug)
1677 {
1678
1679 FILE *fp_usr;
1680 char buf[255];
1681 char *str;
1682 long int nreg=0;
1683
1684 if(debug)
1685 debuga(_("Loading password file from %s\n"),pwdfile);
1686
1687 if ((fp_usr = fopen(pwdfile, "r")) == NULL) {
1688 debuga(_("(getusers) Cannot open file %s - %s\n"),pwdfile,strerror(errno));
1689 exit(EXIT_FAILURE);
1690 }
1691
1692 if (fseek(fp_usr, 0, SEEK_END)==-1) {
1693 debuga(_("Failed to move till the end of the users file %s: %s\n"),pwdfile,strerror(errno));
1694 exit(EXIT_FAILURE);
1695 }
1696 nreg = ftell(fp_usr);
1697 if (nreg<0) {
1698 debuga(_("Cannot get the size of file %s\n"),pwdfile);
1699 exit(EXIT_FAILURE);
1700 }
1701 nreg = nreg+5000;
1702 if (fseek(fp_usr, 0, SEEK_SET)==-1) {
1703 debuga(_("Failed to rewind the users file %s: %s\n"),pwdfile,strerror(errno));
1704 exit(EXIT_FAILURE);
1705 }
1706
1707 if((userfile=(char *) malloc(nreg))==NULL){
1708 debuga(_("malloc error (%ld)\n"),nreg);
1709 exit(EXIT_FAILURE);
1710 }
1711
1712 bzero(userfile,nreg);
1713 strcpy(userfile,":");
1714
1715 while(fgets(buf,sizeof(buf),fp_usr)!=NULL) {
1716 str=strchr(buf,':');
1717 if (!str) {
1718 debuga(_("You have an invalid user in your %s file\n"),pwdfile);
1719 exit(EXIT_FAILURE);
1720 }
1721 str[1]='\0';
1722 strcat(userfile,buf);
1723 }
1724
1725 fclose(fp_usr);
1726
1727 return;
1728 }