]> git.ipfire.org Git - thirdparty/sarg.git/blob - log.c
Accept unlimited line length when converting or splitting the input log.
[thirdparty/sarg.git] / log.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2010
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 #ifdef HAVE_GETOPT_H
31 #include <getopt.h>
32 #endif
33
34 #define REPORT_EVERY_X_LINES 5000
35 #define MAX_OPEN_USER_FILES 10
36
37 struct userfilestruct
38 {
39 struct userfilestruct *next;
40 struct userinfostruct *user;
41 FILE *file;
42 };
43
44 /*@null@*/static char *userfile=NULL;
45
46 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
47 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
48 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
49
50 static void getusers(const char *pwdfile, int debug);
51
52 int main(int argc,char *argv[])
53 {
54 enum isa_col_id {
55 ISACOL_Ip,
56 ISACOL_UserName,
57 ISACOL_Date,
58 ISACOL_Time,
59 ISACOL_TimeTaken,
60 ISACOL_Bytes,
61 ISACOL_Uri,
62 ISACOL_Status,
63 ISACOL_Last //last entry of the list !
64 };
65 enum InputLogFormat {
66 ILF_Unknown,
67 ILF_Squid,
68 ILF_Common,
69 ILF_Sarg,
70 ILF_Isa,
71 ILF_Last //last entry of the list !
72 };
73
74 FILE *fp_in = NULL, *fp_denied=NULL, *fp_authfail=NULL, *fp_log=NULL;
75
76 char sz_Download_Unsort[ 20000 ] ;
77 FILE * fp_Download_Unsort = NULL ;
78
79 extern int optind;
80 extern int optopt;
81 extern char *optarg;
82
83 char data[255];
84 char elap[255];
85 char ip[MAXLEN];
86 char tam[255];
87 char fun[MAXLEN];
88 char wuser[MAXLEN];
89 char smartfilter[MAXLEN];
90 char dia[128];
91 char mes[30];
92 char ano[30];
93 char hora[30];
94 char wtemp[MAXLEN];
95 char wtemp2[255];
96 char date[255];
97 char arq[255];
98 char arq_log[255];
99 char hm[15], hmf[15], hmr[15];
100 int chm=0;
101 char uagent[MAXLEN];
102 char hexclude[MAXLEN];
103 char csort[MAXLEN];
104 int cstatus;
105 char tbuf2[128];
106 char zip[20];
107 char *str;
108 char tmp2[MAXLEN];
109 char tmp3[MAXLEN];
110 char denied_unsort[MAXLEN];
111 char denied_sort[MAXLEN];
112 char authfail_unsort[MAXLEN];
113 char start_hour[128];
114 char end_hour[128];
115 char *linebuf;
116 char hostname[512];
117 char *url;
118 char *urly;
119 char user[MAX_USER_LEN];
120 enum InputLogFormat ilf;
121 int ilf_count[ILF_Last];
122 int ch;
123 int x;
124 int errflg=0;
125 int puser=0;
126 bool fhost=false;
127 bool dns=false;
128 bool fuser=false;
129 int idata=0;
130 int mindate=0;
131 int maxdate=0;
132 int iarq=0;
133 int isa_ncols=0,isa_cols[ISACOL_Last];
134 bool from_stdin;
135 bool from_pipe;
136 int blen;
137 int maxopenfiles;
138 int nopen;
139 bool id_is_ip;
140 long totregsl=0;
141 long totregsg=0;
142 long totregsx=0;
143 bool totper=false;
144 long int max_elapsed=0;
145 long long int iyear, imonth, iday;
146 bool realt;
147 bool userip;
148 struct tm tt;
149 struct tm *t;
150 unsigned long recs1=0UL;
151 unsigned long recs2=0UL;
152 int OutputNonZero = REPORT_EVERY_X_LINES ;
153 bool download_flag=false;
154 char *download_url=NULL;
155 struct getwordstruct gwarea;
156 longline line;
157 time_t tnum;
158 struct stat logstat;
159 struct userinfostruct *uinfo;
160 struct userfilestruct *first_user_file, *ufile, *ufile1, *prev_ufile;
161 static int split=0;
162 static int convert=0;
163 int option_index;
164 static struct option long_options[]=
165 {
166 {"convert",no_argument,&convert,1},
167 {"split",no_argument,&split,1},
168 {0,0,0,0}
169 };
170
171 #ifdef HAVE_LOCALE_H
172 setlocale(LC_TIME,"");
173 #endif
174
175 #if defined(ENABLE_NLS) && defined(HAVE_LOCALE_H)
176 if (!setlocale (LC_ALL, "")) {
177 fprintf(stderr,"SARG: Cannot set the locale LC_ALL to the environment variable\n");
178 exit(EXIT_FAILURE);
179 }
180 if (!bindtextdomain (PACKAGE_NAME, LOCALEDIR)) {
181 fprintf(stderr,"SARG: Cannot bind to text domain %s in directory %s (%s)\n",PACKAGE_NAME,LOCALEDIR,strerror(errno));
182 exit(EXIT_FAILURE);
183 }
184 if (!textdomain (PACKAGE_NAME)) {
185 fprintf(stderr,"SARG: Cannot set gettext domain for %s PACKAGE_NAME (%s)\n",PACKAGE_NAME,strerror(errno));
186 exit(EXIT_FAILURE);
187 }
188 #endif //ENABLE_NLS
189
190 BgImage[0]='\0';
191 LogoImage[0]='\0';
192 LogoText[0]='\0';
193 PasswdFile[0]='\0';
194 OutputEmail[0]='\0';
195 UserAgentLog[0]='\0';
196 ExcludeHosts[0]='\0';
197 ExcludeUsers[0]='\0';
198 ConfigFile[0]='\0';
199 code[0]='\0';
200 LastLog=0;
201 ReportType=0UL;
202 UserTabFile[0]='\0';
203 BlockIt[0]='\0';
204 ExternalCSSFile[0]='\0';
205 RedirectorLogFormat[0]='\0';
206 NRedirectorLogs=0;
207 for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
208
209 sprintf(ExcludeCodes,"%s/exclude_codes",SYSCONFDIR);
210 strcpy(GraphDaysBytesBarColor,"orange");
211 strcpy(BgColor,"#ffffff");
212 strcpy(TxColor,"#000000");
213 strcpy(TxBgColor,"lavender");
214 strcpy(TiColor,"darkblue");
215 strcpy(Width,"80");
216 strcpy(Height,"45");
217 strcpy(LogoTextColor,"#000000");
218 strcpy(HeaderColor,"darkblue");
219 strcpy(HeaderBgColor,"#dddddd");
220 strcpy(LogoTextColor,"#006699");
221 strcpy(FontSize,"9px");
222 strcpy(TempDir,"/tmp");
223 strcpy(OutputDir,"/var/www/html/squid-reports");
224 Ip2Name=false;
225 strcpy(DateFormat,"u");
226 OverwriteReport=false;
227 RemoveTempFiles=true;
228 strcpy(ReplaceIndex,"index.html");
229 Index=INDEX_YES;
230 RecordsWithoutUser=RECORDWITHOUTUSER_IP;
231 UseComma=0;
232 strcpy(MailUtility,"mailx");
233 TopSitesNum=100;
234 TopUsersNum=0;
235 UserIp=0;
236 strcpy(TopuserSortField,"BYTES");
237 strcpy(UserSortField,"BYTES");
238 strcpy(TopuserSortOrder,"reverse");
239 strcpy(UserSortOrder,"reverse");
240 strcpy(TopsitesSortField,"CONNECT");
241 strcpy(TopsitesSortType,"D");
242 LongUrl=0;
243 strcpy(FontFace,"Verdana,Tahoma,Arial");
244 datetimeby=DATETIME_BYTE;
245 strcpy(CharSet,"ISO-8859-1");
246 Privacy=0;
247 strcpy(PrivacyString,"***.***.***.***");
248 strcpy(PrivacyStringColor,"blue");
249 SuccessfulMsg=true;
250 TopUserFields=TOPUSERFIELDS_NUM | TOPUSERFIELDS_DATE_TIME | TOPUSERFIELDS_USERID | TOPUSERFIELDS_CONNECT |
251 TOPUSERFIELDS_BYTES | TOPUSERFIELDS_SETYB | TOPUSERFIELDS_IN_CACHE_OUT |
252 TOPUSERFIELDS_USED_TIME | TOPUSERFIELDS_MILISEC | TOPUSERFIELDS_PTIME |
253 TOPUSERFIELDS_TOTAL | TOPUSERFIELDS_AVERAGE;
254 UserReportFields=USERREPORTFIELDS_CONNECT | USERREPORTFIELDS_BYTES | USERREPORTFIELDS_SETYB |
255 USERREPORTFIELDS_IN_CACHE_OUT | USERREPORTFIELDS_USED_TIME | USERREPORTFIELDS_MILISEC |
256 USERREPORTFIELDS_PTIME | USERREPORTFIELDS_TOTAL | USERREPORTFIELDS_AVERAGE;
257 strcpy(DataFileDelimiter,";");
258 DataFileFields=DATA_FIELD_USER | DATA_FIELD_DATE | DATA_FIELD_TIME | DATA_FIELD_URL | DATA_FIELD_CONNECT |
259 DATA_FIELD_BYTES | DATA_FIELD_IN_CACHE | DATA_FIELD_OUT_CACHE | DATA_FIELD_ELAPSED;
260 ShowReadStatistics=true;
261 strcpy(IndexSortOrder,"D");
262 ShowSargInfo=true;
263 ShowSargLogo=true;
264 strcpy(ParsedOutputLog,"no");
265 strcpy(ParsedOutputLogCompress,"/bin/gzip -f");
266 DisplayedValues=DISPLAY_ABBREV;
267 strcpy(HeaderFontSize,"9px");
268 strcpy(TitleFontSize,"11px");
269 strcpy(AuthUserTemplateFile,"sarg_htaccess");
270 set_download_suffix("7z,ace,arj,avi,bat,bin,bz2,bzip,cab,com,cpio,dll,doc,dot,exe,gz,iso,lha,lzh,mdb,mov,mp3,mpeg,mpg,mso,nrg,ogg,ppt,rar,rtf,shs,src,sys,tar,tgz,vcd,vob,wma,wmv,zip");
271 Graphs=true;
272 #if defined(FONTDIR)
273 strcpy(GraphFont,FONTDIR"/DejaVuSans.ttf");
274 #else
275 GraphFont[0]='\0';
276 #endif
277 strcpy(Ulimit,"20000");
278 NtlmUserFormat=NTLMUSERFORMAT_DOMAINUSER;
279 IndexTree=INDEX_TREE_FILE;
280 strcpy(RealtimeTypes,"GET,PUT,CONNECT");
281 RealtimeUnauthRec=REALTIME_UNAUTH_REC_SHOW;
282 RedirectorIgnoreDate=false;
283 DansguardianIgnoreDate=false;
284 DataFileUrl=DATAFILEURL_IP;
285 strcpy(MaxElapsed,"28800000");
286 BytesInSitesUsersReport=0;
287 UserAuthentication=0;
288 strcpy(LDAPHost,"127.0.0.1");
289 LDAPPort=389;
290 LDAPProtocolVersion=3;
291 LDAPBindDN[0]='\0';
292 LDAPBindPW[0]='\0';
293 LDAPBaseSearch[0]='\0';
294 strcpy(LDAPFilterSearch, "uid=%s");
295 strcpy(LDAPTargetAttr, "cn");
296
297 dia[0]='\0';
298 mes[0]='\0';
299 ano[0]='\0';
300 hora[0]='\0';
301 tmp[0]='\0';
302 tmp2[0]='\0';
303 tmp3[0]='\0';
304 wtemp[0]='\0';
305 wtemp2[0]='\0';
306 us[0]='\0';
307 date[0]='\0';
308 df[0]='\0';
309 uagent[0]='\0';
310 hexclude[0]='\0';
311 addr[0]='\0';
312 hm[0]='\0';
313 hmf[0]='\0';
314 site[0]='\0';
315 outdir[0]='\0';
316 elap[0]='\0';
317 email[0]='\0';
318 zip[0]='\0';
319 UserInvalidChar[0]='\0';
320 DataFile[0]='\0';
321 SquidGuardConf[0]='\0';
322 DansGuardianConf[0]='\0';
323 start_hour[0]='\0';
324 end_hour[0]='\0';
325
326 denied_count=0;
327 download_count=0;
328 authfail_count=0;
329 dansguardian_count=0;
330 squidguard_count=0;
331 useragent_count=0;
332 DeniedReportLimit=10;
333 AuthfailReportLimit=10;
334 DansGuardianReportLimit=10;
335 SquidGuardReportLimit=10;
336 DownloadReportLimit=50;
337 UserReportLimit=0;
338 debug=0;
339 debugz=0;
340 debugm=0;
341 iprel=false;
342 userip=false;
343 realt=false;
344 realtime_refresh=3;
345 realtime_access_log_lines=1000;
346 cost=0.01;
347 nocost=50000000;
348 ndownload=0;
349 squid24=false;
350 dfrom=0;
351 duntil=0;
352
353 bzero(IncludeUsers, sizeof(IncludeUsers));
354 bzero(ExcludeString, sizeof(ExcludeString));
355 first_user_file=NULL;
356 memset(&period,0,sizeof(period));
357
358 NAccessLog=0;
359 for(x=0; x<MAXLOGS; x++)
360 AccessLog[x][0]='\0';
361 AccessLogFromCmdLine=0;
362 RedirectorLogFromCmdLine=0;
363
364 strcpy(Title,_("Squid User Access Report"));
365
366 while((ch = getopt_long_only(argc, argv, "a:b:c:d:e:f:g:u:l:L:o:s:t:w:hijmnprvxyz",long_options,&option_index)) != -1){
367 switch(ch)
368 {
369 case 0:
370 break;
371 case 'a':
372 strcpy(addr,optarg);
373 break;
374 case 'b':
375 strcpy(uagent,optarg);
376 break;
377 case 'c':
378 strcpy(hexclude,optarg);
379 break;
380 case 'd':
381 strncpy(date,optarg,sizeof(date)-1);
382 date[sizeof(date)-1]='\0';
383 date_from(date, &dfrom, &duntil);
384 break;
385 case 'e':
386 strcpy(email,optarg);
387 break;
388 case 'f':
389 strcpy(ConfigFile,optarg);
390 break;
391 case 'g':
392 strcpy(df,optarg);
393 break;
394 case 'h':
395 usage(argv[0]);
396 exit(EXIT_SUCCESS);
397 case 'i':
398 iprel=true;
399 break;
400 case 'l':
401 if (NAccessLog>=MAXLOGS) {
402 debuga(_("Too many log files passed on command line with option -l.\n"));
403 exit(EXIT_FAILURE);
404 }
405 if (strlen(optarg)>=MAX_LOG_FILELEN) {
406 debuga(_("Log file name too long passed on command line with option -l: %s\n"),optarg);
407 exit(EXIT_FAILURE);
408 }
409 strcpy(AccessLog[NAccessLog],optarg);
410 NAccessLog++;
411 AccessLogFromCmdLine++;
412 break;
413 case 'L':
414 if (NRedirectorLogs>MAX_REDIRECTOR_LOGS) {
415 debuga(_("Too many redirector logs passed on command line with option -L.\n"));
416 exit(EXIT_FAILURE);
417 }
418 if (strlen(optarg)>=MAX_REDIRECTOR_FILELEN) {
419 debuga(_("Redirector log file name too long passed on command line with opton -L: %s\n"),optarg);
420 exit(EXIT_FAILURE);
421 }
422 strcpy(RedirectorLogs[NRedirectorLogs],optarg);
423 NRedirectorLogs++;
424 RedirectorLogFromCmdLine++;
425 break;
426 case 'm':
427 debugm++;
428 break;
429 case 'n':
430 dns=true;
431 break;
432 case 'o':
433 strcpy(outdir,optarg);
434 break;
435 case 'p':
436 userip=true;
437 break;
438 case 'r':
439 realt=true;
440 break;
441 case 's':
442 strcpy(site,optarg);
443 break;
444 case 't':
445 {
446 int h,m;
447
448 if(strstr(optarg,"-") == 0) {
449 strcpy(hm,optarg);
450 strcpy(hmf,optarg);
451 } else {
452 getword_start(&gwarea,optarg);
453 if (getword(hm,sizeof(hm),&gwarea,'-')<0 || getword(hmf,sizeof(hmf),&gwarea,'\0')<0) {
454 debuga(_("The time range passed on the command line with option -t is invalid\n"));
455 exit(EXIT_FAILURE);
456 }
457 }
458 if(sscanf(hm,"%d:%d",&h,&m)!=2) {
459 debuga(_("Time period must be MM or MM:SS. Exit\n"));
460 exit(EXIT_FAILURE);
461 }
462 sprintf(hm,"%02d%02d",h,m);
463 if(sscanf(hmf,"%d:%d",&h,&m)!=2) {
464 debuga(_("Time period must be MM or MM:SS. Exit\n"));
465 exit(EXIT_FAILURE);
466 }
467 sprintf(hmf,"%02d%02d",h,m);
468 break;
469 }
470 case 'u':
471 strcpy(us,optarg);
472 break;
473 case 'v':
474 version();
475 break;
476 case 'w':
477 strcpy(tmp,optarg);
478 break;
479 case 'x':
480 debug++;
481 break;
482 case 'y':
483 langcode++;
484 break;
485 case 'z':
486 debugz++;
487 break;
488 /*case ':':
489 debuga(_("Option -%c require an argument\n"),optopt);
490 errflg++;
491 break;*/
492 case '?':
493 usage(argv[0]);
494 exit(EXIT_FAILURE);
495 default:
496 abort();
497 }
498 }
499
500 if (errflg>0) {
501 usage(argv[0]);
502 exit(2);
503 }
504
505 if(debug) debuga(_("Init\n"));
506
507 if(ConfigFile[0] == '\0') snprintf(ConfigFile,sizeof(ConfigFile),"%s/sarg.conf",SYSCONFDIR);
508 if(access(ConfigFile, R_OK) != 0) {
509 debuga(_("Cannot open config file: %s - %s\n"),ConfigFile,strerror(errno));
510 exit(EXIT_FAILURE);
511 }
512
513 if(access(ConfigFile, R_OK) == 0)
514 getconf();
515
516 if(userip) UserIp=true;
517
518 if(dns) Ip2Name=true;
519
520 if(realt) {
521 realtime();
522 exit(EXIT_SUCCESS);
523 }
524
525 if(IndexTree == INDEX_TREE_FILE)
526 strcpy(ImageFile,"../images");
527 else
528 strcpy(ImageFile,"../../../images");
529
530 dataonly=0;
531 if(DataFile[0] != '\0')
532 dataonly++;
533
534 if(NAccessLog == 0) {
535 strcpy(AccessLog[0],"/var/log/squid/access.log");
536 NAccessLog++;
537 }
538
539 if(split) {
540 splitlog(AccessLog[0], df, dfrom, duntil, convert);
541 exit(EXIT_SUCCESS);
542 }
543 if(convert) {
544 convlog(AccessLog[0], df, dfrom, duntil);
545 exit(EXIT_SUCCESS);
546 }
547
548 load_excludecodes(ExcludeCodes);
549
550 if(access(PasswdFile, R_OK) == 0) {
551 getusers(PasswdFile,debug);
552 puser++;
553 }
554
555 if(hexclude[0] == '\0')
556 strcpy(hexclude,ExcludeHosts);
557 if(hexclude[0] != '\0') {
558 gethexclude(hexclude,debug);
559 fhost=true;
560 }
561
562 if(ReportType == 0) {
563 ReportType=REPORT_TYPE_TOPUSERS | REPORT_TYPE_TOPSITES | REPORT_TYPE_USERS_SITES |
564 REPORT_TYPE_SITES_USERS | REPORT_TYPE_DATE_TIME | REPORT_TYPE_DENIED |
565 REPORT_TYPE_AUTH_FAILURES | REPORT_TYPE_SITE_USER_TIME_DATE | REPORT_TYPE_DOWNLOADS;
566 }
567
568 if(access(ExcludeUsers, R_OK) == 0) {
569 getuexclude(ExcludeUsers,debug);
570 fuser=true;
571 }
572
573 indexonly=0;
574 if(fuser) {
575 if(is_indexonly())
576 indexonly++;
577 }
578 if(strcmp(ExcludeUsers,"indexonly") == 0) indexonly++;
579 if(Index == INDEX_ONLY) indexonly++;
580
581 if(MaxElapsed[0] != '\0') max_elapsed=atol(MaxElapsed);
582
583 if(outdir[0] == '\0') strcpy(outdir,OutputDir);
584 strcat(outdir,"/");
585
586 if(uagent[0] == '\0') strcpy(uagent,UserAgentLog);
587
588 if(tmp[0] == '\0') strcpy(tmp,TempDir);
589 else strcpy(TempDir,tmp);
590
591 if(df[0] == '\0') strcpy(df,DateFormat);
592 else strcpy(DateFormat,df);
593
594 if(df[0] == '\0') {
595 strcpy(df,"u");
596 strcpy(DateFormat,"u");
597 }
598 if (df[0]=='w')
599 IndexTree=INDEX_TREE_FILE;
600
601 if(email[0] == '\0' && OutputEmail[0] != '\0') strcpy(email,OutputEmail);
602
603 strcpy(tmp2,tmp);
604
605 if(email[0] != '\0') {
606 snprintf(wtemp2,sizeof(wtemp2),"%s/sarg",tmp2);
607 my_mkdir(wtemp2);
608 strcat(tmp2,"/sarg");
609 strcpy(outdir,tmp2);
610 strcat(outdir,"/");
611 }
612
613 strcat(tmp2,"/sarg.log");
614
615 sprintf(tmp3,"%s/sarg",tmp);
616 if(access(tmp3, R_OK) == 0) {
617 unlinkdir(tmp3,1);
618 }
619 my_mkdir(tmp3);
620 strcpy(denied_unsort,tmp3);
621 strcpy(denied_sort,tmp3);
622 strcpy(authfail_unsort,tmp3);
623 strcat(denied_unsort,"/denied.log.unsort");
624 strcat(denied_sort,"/denied.log");
625 strcat(authfail_unsort,"/authfail.log.unsort");
626
627 if(debug) {
628 debuga(_("Parameters:\n"));
629 debuga(_(" Hostname or IP address (-a) = %s\n"),addr);
630 debuga(_(" Useragent log (-b) = %s\n"),uagent);
631 debuga(_(" Exclude file (-c) = %s\n"),hexclude);
632 debuga(_(" Date from-until (-d) = %s\n"),date);
633 debuga(_(" Email address to send reports (-e) = %s\n"),email);
634 debuga(_(" Config file (-f) = %s\n"),ConfigFile);
635 if(strcmp(df,"e") == 0)
636 debuga(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
637 if(strcmp(df,"u") == 0)
638 debuga(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
639 if(strcmp(df,"w") == 0)
640 debuga(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
641 debuga(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
642 for (iarq=0 ; iarq<NAccessLog ; iarq++)
643 debuga(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
644 for (iarq=0 ; iarq<NRedirectorLogs ; iarq++)
645 debuga(_(" Redirector log (-L) = %s\n"),RedirectorLogs[iarq]);
646 debuga(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
647 debuga(_(" Output dir (-o) = %s\n"),outdir);
648 debuga(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
649 debuga(_(" Accessed site (-s) = %s\n"),site);
650 debuga(_(" Time (-t) = %s\n"),hm);
651 debuga(_(" User (-u) = %s\n"),us);
652 debuga(_(" Temporary dir (-w) = %s\n"),tmp);
653 debuga(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
654 debuga(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
655 debuga("\n");
656 }
657
658 if(debugm) {
659 printf(_("Parameters:\n"));
660 printf(_(" Hostname or IP address (-a) = %s\n"),addr);
661 printf(_(" Useragent log (-b) = %s\n"),uagent);
662 printf(_(" Exclude file (-c) = %s\n"),hexclude);
663 printf(_(" Date from-until (-d) = %s\n"),date);
664 printf(_(" Email address to send reports (-e) = %s\n"),email);
665 printf(_(" Config file (-f) = %s\n"),ConfigFile);
666 if(strcmp(df,"e") == 0)
667 printf(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
668 if(strcmp(df,"u") == 0)
669 printf(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
670 if(strcmp(df,"w") == 0)
671 printf(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
672 printf(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
673 for (iarq=0 ; iarq<NAccessLog ; iarq++)
674 printf(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
675 for (iarq=0 ; iarq<NRedirectorLogs ; iarq++)
676 printf(_(" Redirector log (-L) = %s\n"),RedirectorLogs[iarq]);
677 printf(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
678 printf(_(" Output dir (-o) = %s\n"),outdir);
679 printf(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
680 printf(_(" Accessed site (-s) = %s\n"),site);
681 printf(_(" Time (-t) = %s\n"),hm);
682 printf(_(" User (-u) = %s\n"),us);
683 printf(_(" Temporary dir (-w) = %s\n"),tmp);
684 printf(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
685 printf(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
686 printf(_("sarg version: %s\n"),VERSION);
687 }
688
689 if(debug)
690 debuga(_("sarg version: %s\n"),VERSION);
691
692 maxopenfiles=MAX_OPEN_USER_FILES;
693 #ifdef HAVE_RLIM_T
694 if (Ulimit[0] != '\0') {
695 struct rlimit rl;
696 long l1, l2;
697 int rc=0;
698
699 #if defined(RLIMIT_NOFILE)
700 getrlimit (RLIMIT_NOFILE, &rl);
701 #elif defined(RLIMIT_OFILE)
702 getrlimit (RLIMIT_OFILE, &rl);
703 #else
704 #warning "No rlimit resource for the number of open files"
705 #endif
706 l1 = rl.rlim_cur;
707 l2 = rl.rlim_max;
708
709 rl.rlim_cur = atol(Ulimit);
710 rl.rlim_max = atol(Ulimit);
711 #if defined(RLIMIT_NOFILE)
712 rc=setrlimit (RLIMIT_NOFILE, &rl);
713 #elif defined(RLIMIT_OFILE)
714 rc=setrlimit (RLIMIT_OFILE, &rl);
715 #else
716 #warning "No rlimit resource for the number of open files"
717 #endif
718 if(rc == -1) {
719 debuga(_("setrlimit error - %s\n"),strerror(errno));
720 }
721
722 if(debug)
723 debuga("Maximum file descriptor: cur=%ld max=%ld, changed to cur="RLIM_STRING" max="RLIM_STRING,l1,l2,rl.rlim_cur,rl.rlim_max);
724 }
725 #endif
726
727 init_usertab(UserTabFile);
728
729 if ((line=longline_create())==NULL) {
730 debuga(_("Not enough memory to read a log file\n"));
731 exit(EXIT_FAILURE);
732 }
733
734 snprintf(sz_Download_Unsort,sizeof(sz_Download_Unsort),"%s/sarg/download.unsort", tmp);
735
736 if(DataFile[0]=='\0') {
737 if((ReportType & REPORT_TYPE_DENIED) != 0) {
738 if((fp_denied=MY_FOPEN(denied_unsort,"w"))==NULL) {
739 debuga(_("(log) Cannot open file: %s - %s\n"),denied_unsort,strerror(errno));
740 exit(EXIT_FAILURE);
741 }
742 }
743
744 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
745 if((fp_authfail=MY_FOPEN(authfail_unsort,"w"))==NULL) {
746 debuga(_("(log) Cannot open file: %s - %s\n"),authfail_unsort,strerror(errno));
747 exit(EXIT_FAILURE);
748 }
749 }
750 }
751
752 for (iarq=0 ; iarq<NAccessLog ; iarq++) {
753 strcpy(arq,AccessLog[iarq]);
754
755 strcpy(arqtt,arq);
756
757 if(strcmp(arq,"-")==0) {
758 if(debug)
759 debuga(_("Reading access log file: from stdin\n"));
760 fp_in=stdin;
761 from_stdin=true;
762 } else {
763 if (date[0]!='\0') {
764 if (stat(arq,&logstat)!=0) {
765 debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
766 } else {
767 struct tm *logtime=localtime(&logstat.st_mtime);
768 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<dfrom) {
769 debuga(_("Ignoring old log file %s\n"),arq);
770 continue;
771 }
772 }
773 }
774 fp_in=decomp(arq,&from_pipe);
775 if(fp_in==NULL) {
776 debuga(_("(log) Cannot open log file: %s - %s\n"),arq,strerror(errno));
777 exit(EXIT_FAILURE);
778 }
779 if(debug) debuga(_("Reading access log file: %s\n"),arq);
780 from_stdin=false;
781 }
782 ilf=ILF_Unknown;
783 download_flag=false;
784 // pre-read the file only if we have to show stats
785 if(ShowReadStatistics && !from_stdin && !from_pipe) {
786 size_t nread,i;
787 bool skipcr=false;
788 char tmp4[MAXLEN];
789
790 recs1=0UL;
791 recs2=0UL;
792
793 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
794 for (i=0 ; i<nread ; i++)
795 if (skipcr) {
796 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
797 skipcr=false;
798 }
799 } else {
800 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
801 skipcr=true;
802 recs1++;
803 }
804 }
805 }
806 rewind(fp_in);
807 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
808 putchar('\r');
809 fflush( stdout ) ;
810 }
811
812 longline_reset(line);
813
814 while ((linebuf=longline_read(fp_in,line))!=NULL) {
815 blen=strlen(linebuf);
816
817 if (ilf==ILF_Unknown) {
818 if(strncmp(linebuf,"#Software: Mic",14) == 0) {
819 fixendofline(linebuf);
820 if (debug)
821 debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
822 ilf=ILF_Isa;
823 ilf_count[ilf]++;
824 continue;
825 }
826
827 if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
828 if (getperiod_fromsarglog(arqtt,&period)<0) {
829 debuga(_("The name of the file is invalid: %s\n"),arq);
830 exit(EXIT_FAILURE);
831 }
832 ilf=ILF_Sarg;
833 ilf_count[ilf]++;
834 continue;
835 }
836 }
837
838 if(!fp_log && strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg) {
839 if(access(ParsedOutputLog,R_OK) != 0) {
840 my_mkdir(ParsedOutputLog);
841 }
842 sprintf(arq_log,"%s/sarg_temp.log",ParsedOutputLog);
843 if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
844 debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
845 exit(EXIT_FAILURE);
846 }
847 fputs("*** SARG Log ***\n",fp_log);
848 }
849
850 recs2++;
851 if( ShowReadStatistics && !from_stdin && --OutputNonZero<=0) {
852 double perc = recs2 * 100. / recs1 ;
853 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs1,perc);
854 putchar('\r');
855 fflush (stdout);
856 OutputNonZero = REPORT_EVERY_X_LINES ;
857 }
858 if(blen < 58) continue;
859 if(strstr(linebuf,"HTTP/0.0") != 0) continue;
860 if(strstr(linebuf,"logfile turned over") != 0) continue;
861 if(linebuf[0] == ' ') continue;
862
863 // exclude_string
864 if(ExcludeString[0] != '\0') {
865 bool exstring=false;
866 getword_start(&gwarea,ExcludeString);
867 while(strchr(gwarea.current,':') != 0) {
868 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
869 debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
870 exit(EXIT_FAILURE);
871 }
872 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
873 exstring=true;
874 break;
875 }
876 }
877 if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
878 exstring=true;
879 if(exstring) continue;
880 }
881
882 totregsl++;
883 if(debugm)
884 printf("BUF=%s\n",linebuf);
885
886 t=NULL;
887 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
888 getword_start(&gwarea,linebuf);
889 if (getword(data,sizeof(data),&gwarea,' ')<0) {
890 debuga(_("Maybe you have a broken time in your access.log file\n"));
891 exit(EXIT_FAILURE);
892 }
893 if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
894 strcpy(ip,data);
895 strcpy(elap,"0");
896 if(squid24) {
897 if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
898 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
899 exit(EXIT_FAILURE);
900 }
901 } else {
902 if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
903 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
904 exit(EXIT_FAILURE);
905 }
906 }
907 if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
908 getword(fun,sizeof(fun),&gwarea,' ')<0) {
909 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
910 exit(EXIT_FAILURE);
911 }
912 if (getword_ptr(linebuf,&url,&gwarea,' ')<0) {
913 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
914 exit(EXIT_FAILURE);
915 }
916 if (getword_skip(MAXLEN,&gwarea,' ')<0) {
917 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
918 exit(EXIT_FAILURE);
919 }
920 if (getword(code2,sizeof(code2),&gwarea,' ')<0) {
921 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
922 exit(EXIT_FAILURE);
923 }
924 if (getword(tam,sizeof(tam),&gwarea,' ')<0) {
925 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
926 exit(EXIT_FAILURE);
927 }
928 if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
929 if (getword(code,sizeof(code),&gwarea,' ')<0) {
930 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
931 exit(EXIT_FAILURE);
932 }
933 } else {
934 if (getword(code,sizeof(code),&gwarea,'\0')<0) {
935 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
936 exit(EXIT_FAILURE);
937 }
938 }
939
940 if ((str = strchr(code, ':')) != NULL)
941 *str = '/';
942
943 if(strcmp(tam,"\0") == 0)
944 strcpy(tam,"0");
945
946 ilf=ILF_Common;
947 ilf_count[ilf]++;
948
949 getword_start(&gwarea,data+1);
950 if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
951 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
952 exit(EXIT_FAILURE);
953 }
954 if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
955 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
956 exit(EXIT_FAILURE);
957 }
958 getword_start(&gwarea,data);
959 if (getword_atoll(&iday,&gwarea,'/')<0){
960 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
961 exit(EXIT_FAILURE);
962 }
963 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
964 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
965 exit(EXIT_FAILURE);
966 }
967 if (getword_atoll(&iyear,&gwarea,'/')<0){
968 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
969 exit(EXIT_FAILURE);
970 }
971
972 imonth=month2num(mes)+1;
973 idata=builddia(iday,imonth,iyear);
974 computedate(iyear,imonth,iday,&tt);
975 t=&tt;
976 }
977
978 if(ilf==ILF_Unknown || ilf==ILF_Squid) {
979 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
980 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
981 exit(EXIT_FAILURE);
982 }
983 while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
984 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
985 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
986 exit(EXIT_FAILURE);
987 }
988 if(strlen(elap) < 1) continue;
989 if (getword(ip,sizeof(ip),&gwarea,' ')<0){
990 debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq);
991 exit(EXIT_FAILURE);
992 }
993 if (getword(code,sizeof(code),&gwarea,' ')<0){
994 debuga(_("Maybe you have a broken result code in your %s file\n"),arq);
995 exit(EXIT_FAILURE);
996 }
997 if (getword(tam,sizeof(tam),&gwarea,' ')<0){
998 debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq);
999 exit(EXIT_FAILURE);
1000 }
1001 if (getword(fun,sizeof(fun),&gwarea,' ')<0){
1002 debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
1003 exit(EXIT_FAILURE);
1004 }
1005 if (getword_ptr(linebuf,&url,&gwarea,' ')<0){
1006 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
1007 exit(EXIT_FAILURE);
1008 }
1009 if (getword(user,sizeof(user),&gwarea,' ')<0){
1010 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
1011 exit(EXIT_FAILURE);
1012 }
1013 ilf=ILF_Squid;
1014 ilf_count[ilf]++;
1015
1016 tnum=atoi(data);
1017 t=localtime(&tnum);
1018 if (t == NULL) {
1019 debuga(_("Cannot convert the timestamp from the squid log file\n"));
1020 exit(EXIT_FAILURE);
1021 }
1022
1023 strftime(tbuf2, sizeof(tbuf2), "%H%M", t);
1024
1025 idata=(t->tm_year+1900)*10000+(t->tm_mon+1)*100+t->tm_mday;
1026 }
1027 }
1028 if (ilf==ILF_Sarg) {
1029 getword_start(&gwarea,linebuf);
1030 if (getword(data,sizeof(data),&gwarea,'\t')<0){
1031 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1032 exit(EXIT_FAILURE);
1033 }
1034 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
1035 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1036 exit(EXIT_FAILURE);
1037 }
1038 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
1039 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1040 exit(EXIT_FAILURE);
1041 }
1042 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
1043 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1044 exit(EXIT_FAILURE);
1045 }
1046 if (getword_ptr(linebuf,&url,&gwarea,'\t')<0){
1047 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1048 exit(EXIT_FAILURE);
1049 }
1050 if (getword(tam,sizeof(tam),&gwarea,'\t')<0){
1051 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1052 exit(EXIT_FAILURE);
1053 }
1054 if (getword(code,sizeof(code),&gwarea,'\t')<0){
1055 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1056 exit(EXIT_FAILURE);
1057 }
1058 if (getword(elap,sizeof(elap),&gwarea,'\t')<0){
1059 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1060 exit(EXIT_FAILURE);
1061 }
1062 if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
1063 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1064 exit(EXIT_FAILURE);
1065 }
1066 getword_start(&gwarea,data);
1067 if (getword_atoll(&iday,&gwarea,'/')<0 || iday<1 || iday>31){
1068 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1069 exit(EXIT_FAILURE);
1070 }
1071 if (getword_atoll(&imonth,&gwarea,'/')<0 || imonth<1 || imonth>12){
1072 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1073 exit(EXIT_FAILURE);
1074 }
1075 if (getword_atoll(&iyear,&gwarea,'\0')<0){
1076 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1077 exit(EXIT_FAILURE);
1078 }
1079 idata=builddia(iday,imonth,iyear);
1080 computedate(iyear,imonth,iday,&tt);
1081 t=&tt;
1082 }
1083 if (ilf==ILF_Isa) {
1084 if (linebuf[0] == '#') {
1085 int ncols,cols[ISACOL_Last];
1086
1087 fixendofline(linebuf);
1088 getword_start(&gwarea,linebuf);
1089 // remove the #Fields: column at the beginning of the line
1090 if (getword_skip(1000,&gwarea,' ')<0){
1091 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1092 exit(EXIT_FAILURE);
1093 }
1094 for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
1095 ncols=0;
1096 while(gwarea.current[0] != '\0') {
1097 if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
1098 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1099 exit(EXIT_FAILURE);
1100 }
1101 if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
1102 if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
1103 if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
1104 if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
1105 if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
1106 if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
1107 if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
1108 if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
1109 ncols++;
1110 }
1111 if (cols[ISACOL_Ip]>=0) {
1112 isa_ncols=ncols;
1113 for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
1114 isa_cols[ncols]=cols[ncols];
1115 }
1116 continue;
1117 }
1118 if (!isa_ncols) continue;
1119 getword_start(&gwarea,linebuf);
1120 for (x=0 ; x<isa_ncols ; x++) {
1121 if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
1122 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1123 exit(EXIT_FAILURE);
1124 }
1125 if (x==isa_cols[ISACOL_Ip]) {
1126 if (strlen(str)>=sizeof(ip)) {
1127 debuga(_("Maybe you have a broken IP in your %s file\n"),arq);
1128 exit(EXIT_FAILURE);
1129 }
1130 strcpy(ip,str);
1131 } else if (x==isa_cols[ISACOL_UserName]) {
1132 if (strlen(str)>=sizeof(user)) {
1133 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
1134 exit(EXIT_FAILURE);
1135 }
1136 strcpy(user,str);
1137 } else if (x==isa_cols[ISACOL_Date]) {
1138 if (strlen(str)>=sizeof(data)) {
1139 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1140 exit(EXIT_FAILURE);
1141 }
1142 strcpy(data,str);
1143 } else if (x==isa_cols[ISACOL_Time]) {
1144 if (strlen(str)>=sizeof(hora)) {
1145 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1146 exit(EXIT_FAILURE);
1147 }
1148 strcpy(hora,str);
1149 } else if (x==isa_cols[ISACOL_TimeTaken]) {
1150 if (strlen(str)>=sizeof(elap)) {
1151 debuga(_("Maybe you have a broken download duration in your %s file\n"),arq);
1152 exit(EXIT_FAILURE);
1153 }
1154 strcpy(elap,str);
1155 } else if (x==isa_cols[ISACOL_Bytes]) {
1156 if (strlen(str)>=sizeof(tam)) {
1157 debuga(_("Maybe you have a broken download size in your %s file\n"),arq);
1158 exit(EXIT_FAILURE);
1159 }
1160 strcpy(tam,str);
1161 } else if (x==isa_cols[ISACOL_Uri]) {
1162 url=str;
1163 } else if (x==isa_cols[ISACOL_Status]) {
1164 if (strlen(str)>=sizeof(code)) {
1165 debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
1166 exit(EXIT_FAILURE);
1167 }
1168 strcpy(code,str);
1169 }
1170 }
1171
1172 if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
1173 sprintf(val1,"DENIED/%s",code);
1174 strcpy(code,val1);
1175 }
1176 getword_start(&gwarea,data);
1177 if (getword_atoll(&iyear,&gwarea,'-')<0){
1178 debuga(_("Maybe you have a broken year in your %s file\n"),arq);
1179 exit(EXIT_FAILURE);
1180 }
1181 if (getword_atoll(&imonth,&gwarea,'-')<0){
1182 debuga(_("Maybe you have a broken month in your %s file\n"),arq);
1183 exit(EXIT_FAILURE);
1184 }
1185 if (getword_atoll(&iday,&gwarea,'\0')<0){
1186 debuga(_("Maybe you have a broken day in your %s file\n"),arq);
1187 exit(EXIT_FAILURE);
1188 }
1189
1190 idata=builddia(iday,imonth,iyear);
1191 computedate(iyear,imonth,iday,&tt);
1192 t=&tt;
1193 }
1194 if (t==NULL) {
1195 debuga(_("Unknown input log file format\n"));
1196 break;
1197 }
1198
1199 strftime(dia, sizeof(dia), "%d/%m/%Y", t);
1200 snprintf(hora,sizeof(hora),"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec);
1201
1202 if(debugm)
1203 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",date,idata,dfrom,duntil);
1204
1205 if(date[0] != '\0'){
1206 if(idata < dfrom || idata > duntil) continue;
1207 }
1208
1209 // Record only hours usage which is required
1210 if (t) {
1211 if( bsearch( &( t -> tm_wday ), weekdays.list, weekdays.len,
1212 sizeof( int ), compar ) == NULL )
1213 continue;
1214
1215 if( bsearch( &( t -> tm_hour ), hours.list, hours.len,
1216 sizeof( int ), compar ) == NULL )
1217 continue;
1218 }
1219
1220
1221 if(strlen(user) > MAX_USER_LEN) {
1222 if (debugm) printf(_("User ID too long: %s\n"),user);
1223 totregsx++;
1224 continue;
1225 }
1226
1227 // include_users
1228 if(IncludeUsers[0] != '\0') {
1229 sprintf(val1,":%s:",user);
1230 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
1231 continue;
1232 }
1233
1234 if(vercode(code)) {
1235 if (debugm) printf(_("Excluded code: %s\n"),code);
1236 totregsx++;
1237 continue;
1238 }
1239
1240 if(testvaliduserchar(user))
1241 continue;
1242
1243 #if 0
1244 if((str = strstr(user,"%20")) != NULL) {
1245 /*
1246 This is a patch introduced to solve bug #1624251 reported at sourceforge but
1247 the side effect is to truncate the name at the first space and merge the reports
1248 of people whose name is identical up to the first space.
1249
1250 The old code used to truncate the user name at the first % if a %20 was
1251 found anywhere in the string. That means the string could be truncated
1252 at the wrong place if another % occured before the %20. This new code should
1253 avoid that problem and only truncate at the space. There is no bug
1254 report indicating that anybody noticed this.
1255 */
1256 *str='\0';
1257 }
1258
1259 /*
1260 Code prior to 2.2.7 used to replace any %xx by a dot as long as a %5c was
1261 found in the user name.
1262 */
1263 while((str = strstr(user,"%5c")) != NULL) {
1264 *str='.';
1265 for (x=3 ; str[x] ; x++) str[x-2]=str[x];
1266 }
1267 #endif
1268
1269 urly=url;
1270
1271 if(ilf!=ILF_Sarg) {
1272 /*
1273 The full URL is not saved in sarg log. There is no point in testing the URL to detect
1274 a downloaded file.
1275 */
1276 download_flag=is_download_suffix(url);
1277 if (download_flag) {
1278 download_url=url;
1279 download_count++;
1280 }
1281 } else
1282 download_flag=false;
1283
1284 // remove any protocol:// at the beginning of the URL
1285 if ((str = strchr(url,'/')) != NULL && str[1] == '/') {
1286 int i;
1287
1288 str+=2;
1289 for (i=0 ; str[i] ; i++)
1290 url[i]=str[i];
1291 url[i]='\0';
1292 }
1293
1294 if(!LongUrl) {
1295 url_hostname(url,hostname,sizeof(hostname));
1296 url=hostname;
1297 }
1298
1299 if(url[0] == '\0') continue;
1300
1301 if(addr[0] != '\0'){
1302 if(strcmp(addr,ip)!=0) continue;
1303 }
1304 if(fhost) {
1305 if(!vhexclude(url)) {
1306 if (debugm) printf(_("Excluded site: %s\n"),url);
1307 totregsx++;
1308 continue;
1309 }
1310 }
1311
1312 if(hm[0] != '\0') {
1313 hmr[0]='\0';
1314 chm++;
1315 getword_start(&gwarea,hora);
1316 while(chm) {
1317 if (getword_multisep(warea,sizeof(warea),&gwarea,':')<0){
1318 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1319 exit(EXIT_FAILURE);
1320 }
1321 strncat(hmr,warea,2);
1322 chm--;
1323 }
1324 strncat(hmr,gwarea.current,2);
1325
1326 if(atoi(hmr) < atoi(hm) || atoi(hmr) > atoi(hmf)) continue;
1327 }
1328
1329 if(site[0] != '\0'){
1330 if(strstr(url,site)==0) continue;
1331 }
1332
1333 if(UserIp) {
1334 strcpy(user,ip);
1335 id_is_ip=true;
1336 } else {
1337 id_is_ip=false;
1338 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
1339 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
1340 strcpy(user,ip);
1341 id_is_ip=true;
1342 }
1343 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
1344 continue;
1345 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
1346 strcpy(user,"everybody");
1347 } else {
1348 strlow(user);
1349 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
1350 if((str = strchr(user,'_')) != 0) {
1351 strcpy(warea,str+1);
1352 strcpy(user,warea);
1353 }
1354 if((str = strchr(user,'+')) != 0) {
1355 strcpy(warea,str+1);
1356 strcpy(user,warea);
1357 }
1358 }
1359 }
1360 }
1361
1362 if(us[0] != '\0'){
1363 if(strcmp(user,us)!=0) continue;
1364 }
1365
1366 if(puser) {
1367 sprintf(wuser,":%s:",user);
1368 if(strstr(userfile, wuser) == 0)
1369 continue;
1370 }
1371
1372 if(fuser) {
1373 if(!vuexclude(user)) {
1374 if (debugm) printf(_("Excluded user: %s\n"),user);
1375 totregsx++;
1376 continue;
1377 }
1378 }
1379
1380 if(strcmp(user,"-") ==0 || strcmp(user," ") ==0 || strcmp(user,"") ==0 || strcmp(user,":") ==0)
1381 continue;
1382
1383 if(max_elapsed) {
1384 if(atol(elap)>max_elapsed) {
1385 elap[0]='0';
1386 elap[1]='\0';
1387 }
1388 }
1389
1390 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
1391 fixendofline(str);
1392 sprintf(smartfilter,"\"%s\"",str+1);
1393 } else sprintf(smartfilter,"\"\"");
1394
1395 nopen=0;
1396 prev_ufile=NULL;
1397 for (ufile=first_user_file ; ufile && strcmp(user,ufile->user->id)!=0 ; ufile=ufile->next) {
1398 prev_ufile=ufile;
1399 if (ufile->file) nopen++;
1400 }
1401 if (!ufile) {
1402 ufile=malloc(sizeof(*ufile));
1403 if (!ufile) {
1404 debuga(_("Not enough memory to store the user %s\n"),user);
1405 exit(EXIT_FAILURE);
1406 }
1407 memset(ufile,0,sizeof(*ufile));
1408 ufile->next=first_user_file;
1409 first_user_file=ufile;
1410 uinfo=userinfo_create(user);
1411 ufile->user=uinfo;
1412 uinfo->id_is_ip=id_is_ip;
1413 } else {
1414 if (prev_ufile) {
1415 prev_ufile->next=ufile->next;
1416 ufile->next=first_user_file;
1417 first_user_file=ufile;
1418 }
1419 }
1420
1421 if (ufile->file==NULL) {
1422 if (nopen>=maxopenfiles) {
1423 x=0;
1424 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
1425 if (ufile1->file!=NULL) {
1426 if (x>=maxopenfiles) {
1427 if (fclose(ufile1->file)==EOF) {
1428 debuga(_("Failed to close the log file of user %s - %s\n"),ufile1->user->id,strerror(errno));
1429 exit(EXIT_FAILURE);
1430 }
1431 ufile1->file=NULL;
1432 }
1433 x++;
1434 }
1435 }
1436 }
1437 if (snprintf (tmp3, sizeof(tmp3), "%s/sarg/%s.unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
1438 debuga(_("Temporary user file name too long: %s/sarg/%s.unsort\n"), tmp, ufile->user->filename);
1439 exit(EXIT_FAILURE);
1440 }
1441 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
1442 debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno));
1443 exit (1);
1444 }
1445 }
1446
1447 /*if ( strcmp ( user , sz_Last_User ) != 0 ) {
1448 if ( fp_Write_User )
1449 fclose( fp_Write_User ) ;
1450 sprintf (tmp3, "%s/sarg/%s.unsort", tmp, user);
1451
1452 if ((fp_Write_User = MY_FOPEN (tmp3, "a")) == NULL) {
1453 fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], _("Cannot open temporary file"), tmp3, strerror(errno));
1454 exit (1);
1455 }
1456 strcpy( sz_Last_User , user ) ;
1457 }*/
1458 if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,ip,url,tam,code,elap,smartfilter)<=0) {
1459 debuga(_("Write error in the log file of user %s\n"),user);
1460 exit(EXIT_FAILURE);
1461 }
1462
1463 if(fp_log && ilf!=ILF_Sarg)
1464 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,url,tam,code,elap,smartfilter);
1465
1466 totregsg++;
1467
1468 if(!dataonly && download_flag && download_url && strstr(code,"DENIED") == 0) {
1469 ndownload = 1;
1470
1471 if ( ! fp_Download_Unsort ) {
1472 if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
1473 debuga(_("(log) Cannot open temporary file: %s - %s\n"),tmp3, strerror(errno));
1474 exit (1);
1475 }
1476 }
1477 fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,download_url);
1478 }
1479
1480 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
1481 if(fp_denied && strstr(code,"DENIED/403") != 0) {
1482 fprintf(fp_denied, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1483 denied_count++;
1484 }
1485 if(fp_authfail && (strstr(code,"DENIED/401") != 0 || strstr(code,"DENIED/407") != 0)) {
1486 fprintf(fp_authfail, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1487 authfail_count++;
1488 }
1489 }
1490
1491 if (ilf!=ILF_Sarg) {
1492 if(!totper || idata<mindate){
1493 mindate=idata;
1494 memcpy(&period.start,t,sizeof(*t));
1495 strcpy(start_hour,tbuf2);
1496 }
1497 if (!totper || idata>maxdate) {
1498 maxdate=idata;
1499 memcpy(&period.end,t,sizeof(*t));
1500 }
1501 totper=true;
1502 }
1503
1504 if(debugm){
1505 printf("IP=\t%s\n",ip);
1506 printf("USER=\t%s\n",user);
1507 printf("ELAP=\t%s\n",elap);
1508 printf("DATE=\t%s\n",dia);
1509 printf("TIME=\t%s\n",hora);
1510 printf("FUNC=\t%s\n",fun);
1511 printf("URL=\t%s\n",url);
1512 printf("CODE=\t%s\n",code);
1513 printf("LEN=\t%s\n",tam);
1514 }
1515 }
1516 if (!from_stdin) {
1517 if (from_pipe)
1518 pclose(fp_in);
1519 else
1520 fclose(fp_in);
1521 if( ShowReadStatistics )
1522 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs1, (float) 100 );
1523 }
1524 }
1525
1526 if (debug)
1527 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
1528
1529 longline_destroy(&line);
1530 if ( fp_Download_Unsort )
1531 fclose (fp_Download_Unsort);
1532
1533 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
1534 ufile1=ufile->next;
1535 if (ufile->file!=NULL) fclose(ufile->file);
1536 free(ufile);
1537 }
1538
1539 free_download();
1540 free_excludecodes();
1541 free_exclude();
1542
1543 if(debug) {
1544 int totalcount=0;
1545
1546 for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
1547
1548 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
1549 debuga(_("Log with mixed records format (squid and common log)\n"));
1550
1551 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
1552 debuga(_("Common log format\n"));
1553
1554 if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
1555 debuga(_("Squid log format\n"));
1556
1557 if(ilf_count[ILF_Sarg]>0)
1558 debuga(_("Sarg log format\n"));
1559
1560 if(totalcount==0 && totregsg)
1561 debuga(_("Log with invalid format\n"));
1562 }
1563
1564 if(!totregsg){
1565 debuga(_("No records found\n"));
1566 debuga(_("End\n"));
1567 if(fp_denied) fclose(fp_denied);
1568 if(fp_authfail) fclose(fp_authfail);
1569 userinfo_free();
1570 if(userfile) free(userfile);
1571 close_usertab();
1572 exit(EXIT_SUCCESS);
1573 }
1574
1575 if (date[0]!='\0') {
1576 char date0[30], date1[30];
1577
1578 strftime(date0,sizeof(date0),"%d/%m/%Y",&period.start);
1579 strftime(date1,sizeof(date1),"%d/%m/%Y",&period.end);
1580 debuga(_("Period covered by log files: %s-%s\n"),date0,date1);
1581 getperiod_fromrange(&period,dfrom,duntil);
1582 }
1583 if (getperiod_buildtext(&period)<0) {
1584 debuga(_("Failed to build the string representation of the date range\n"));
1585 exit(EXIT_FAILURE);
1586 }
1587
1588 if(debugz){
1589 debugaz("data",dia);
1590 debugaz("period",period.text);
1591 }
1592
1593 if(debug)
1594 debuga(_("Period: %s\n"),period.text);
1595
1596 // fclose(fp_ou);
1597 if(fp_denied)
1598 fclose(fp_denied);
1599 if(fp_authfail)
1600 fclose(fp_authfail);
1601
1602 if(fp_log != NULL) {
1603 fclose(fp_log);
1604 strcpy(end_hour,tbuf2);
1605 strftime(val2,sizeof(val2),"%d%m%Y",&period.start);
1606 strftime(val1,sizeof(val1),"%d%m%Y",&period.end);
1607 sprintf(val4,"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour);
1608 if (rename(arq_log,val4)) {
1609 debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno));
1610 } else {
1611 strcpy(arq_log,val4);
1612
1613 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
1614 /*
1615 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
1616 necessary around the command name, put them in the configuration file.
1617 */
1618 sprintf(val1,"%s \"%s\"",ParsedOutputLogCompress,arq_log);
1619 cstatus=system(val1);
1620 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1621 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
1622 debuga(_("command: %s\n"),val1);
1623 exit(EXIT_FAILURE);
1624 }
1625 }
1626 }
1627 if(debug)
1628 debuga(_("Sarg parsed log saved as %s\n"),arq_log);
1629 }
1630
1631 if(DataFile[0] == '\0' && (ReportType & REPORT_TYPE_DENIED) != 0) {
1632 sprintf(csort,"sort -T \"%s\" -k 3,3 -k 5,5 -o \"%s\" \"%s\"",tmp,denied_sort,denied_unsort);
1633 cstatus=system(csort);
1634 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1635 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
1636 debuga(_("sort command: %s\n"),csort);
1637 exit(EXIT_FAILURE);
1638 }
1639 unlink(denied_unsort);
1640 }
1641
1642 sort_users_log(tmp, debug);
1643
1644 if(DataFile[0] != '\0')
1645 data_file(tmp);
1646 else
1647 gerarel();
1648
1649 unlink(tmp2);
1650 if((ReportType & REPORT_TYPE_DENIED) != 0)
1651 unlink(denied_sort);
1652
1653 if(strcmp(tmp,"/tmp") != 0) {
1654 unlinkdir(tmp,0);
1655 }
1656
1657 userinfo_free();
1658 if(userfile)
1659 free(userfile);
1660 close_usertab();
1661
1662 if(debug)
1663 debuga(_("End\n"));
1664
1665 exit(EXIT_SUCCESS);
1666
1667 }
1668
1669
1670 static void getusers(const char *pwdfile, int debug)
1671 {
1672
1673 FILE *fp_usr;
1674 char buf[255];
1675 char *str;
1676 long int nreg=0;
1677
1678 if(debug)
1679 debuga(_("Loading password file from %s\n"),pwdfile);
1680
1681 if ((fp_usr = fopen(pwdfile, "r")) == NULL) {
1682 debuga(_("(getusers) Cannot open file %s - %s\n"),pwdfile,strerror(errno));
1683 exit(EXIT_FAILURE);
1684 }
1685
1686 if (fseek(fp_usr, 0, SEEK_END)==-1) {
1687 debuga(_("Failed to move till the end of the users file %s: %s\n"),pwdfile,strerror(errno));
1688 exit(EXIT_FAILURE);
1689 }
1690 nreg = ftell(fp_usr);
1691 if (nreg<0) {
1692 debuga(_("Cannot get the size of file %s\n"),pwdfile);
1693 exit(EXIT_FAILURE);
1694 }
1695 nreg = nreg+5000;
1696 if (fseek(fp_usr, 0, SEEK_SET)==-1) {
1697 debuga(_("Failed to rewind the users file %s: %s\n"),pwdfile,strerror(errno));
1698 exit(EXIT_FAILURE);
1699 }
1700
1701 if((userfile=(char *) malloc(nreg))==NULL){
1702 debuga(_("malloc error (%ld)\n"),nreg);
1703 exit(EXIT_FAILURE);
1704 }
1705
1706 bzero(userfile,nreg);
1707 strcpy(userfile,":");
1708
1709 while(fgets(buf,sizeof(buf),fp_usr)!=NULL) {
1710 str=strchr(buf,':');
1711 if (!str) {
1712 debuga(_("You have an invalid user in your %s file\n"),pwdfile);
1713 exit(EXIT_FAILURE);
1714 }
1715 str[1]='\0';
1716 strcat(userfile,buf);
1717 }
1718
1719 fclose(fp_usr);
1720
1721 return;
1722 }