]> git.ipfire.org Git - thirdparty/sarg.git/blob - log.c
Mimic an opaque object to read long lines
[thirdparty/sarg.git] / log.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2010
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 #define REPORT_EVERY_X_LINES 5000
31 #define MAX_OPEN_USER_FILES 10
32
33 struct userfilestruct
34 {
35 struct userfilestruct *next;
36 struct userinfostruct *user;
37 FILE *file;
38 };
39
40 static char *userfile=NULL;
41
42 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
43 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
44 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
45
46 static void getusers(const char *pwdfile, int debug);
47
48 int main(int argc,char *argv[])
49 {
50 enum isa_col_id {
51 ISACOL_Ip,
52 ISACOL_UserName,
53 ISACOL_Date,
54 ISACOL_Time,
55 ISACOL_TimeTaken,
56 ISACOL_Bytes,
57 ISACOL_Uri,
58 ISACOL_Status,
59 ISACOL_Last //last entry of the list !
60 };
61 enum InputLogFormat {
62 ILF_Unknown,
63 ILF_Squid,
64 ILF_Common,
65 ILF_Sarg,
66 ILF_Isa,
67 ILF_Last //last entry of the list !
68 };
69
70 FILE *fp_in = NULL, *fp_denied=NULL, *fp_authfail=NULL, *fp_log=NULL;
71
72 char sz_Download_Unsort[ 20000 ] ;
73 FILE * fp_Download_Unsort = NULL ;
74
75 extern int optind;
76 extern int optopt;
77 extern char *optarg;
78
79 char data[255];
80 char elap[255];
81 char ip[MAXLEN];
82 char tam[255];
83 char fun[MAXLEN];
84 char wuser[MAXLEN];
85 char smartfilter[MAXLEN];
86 char dia[128];
87 char wdata[128];
88 char mes[30];
89 char ano[30];
90 char hora[30];
91 char wtemp[MAXLEN];
92 char wtemp2[255];
93 char date[255];
94 char arq[255];
95 char arq_log[255];
96 char hm[15], hmf[15], hmr[15];
97 int chm=0;
98 char uagent[MAXLEN];
99 char hexclude[MAXLEN];
100 char csort[MAXLEN];
101 int cstatus;
102 char tbuf[128];
103 char tbuf2[128];
104 char zip[20];
105 char *str;
106 char tmp2[MAXLEN];
107 char tmp3[MAXLEN];
108 char denied_unsort[MAXLEN];
109 char denied_sort[MAXLEN];
110 char authfail_unsort[MAXLEN];
111 char start_hour[128];
112 char end_hour[128];
113 char *linebuf;
114 char hostname[512];
115 char *url;
116 char *urly;
117 char user[MAX_USER_LEN];
118 enum InputLogFormat ilf;
119 int ilf_count[ILF_Last];
120 int ch;
121 int x;
122 int errflg=0;
123 int puser=0;
124 int fhost=0;
125 int dns=0;
126 int fuser=0;
127 int idata=0;
128 int mindate=0;
129 int iarq=0;
130 int exstring=0;
131 int isa_ncols=0,isa_cols[ISACOL_Last];
132 int from_stdin;
133 int blen;
134 int maxopenfiles;
135 int nopen;
136 int id_is_ip;
137 long totregsl=0;
138 long totregsg=0;
139 long totregsx=0;
140 long totper=0;
141 long int max_elapsed=0;
142 time_t tt;
143 struct tm *t;
144 unsigned long recs1=0UL;
145 unsigned long recs2=0UL;
146 int OutputNonZero = REPORT_EVERY_X_LINES ;
147 int download_flag=0;
148 char *download_url=NULL;
149 struct getwordstruct gwarea;
150 longline line;
151 struct userinfostruct *uinfo;
152 struct userfilestruct *first_user_file, *ufile, *ufile1, *prev_ufile;
153
154 #ifdef HAVE_LOCALE_H
155 setlocale(LC_TIME,"");
156 #endif
157
158 #if defined(ENABLE_NLS) && defined(HAVE_LOCALE_H)
159 setlocale (LC_ALL, "");
160 bindtextdomain (PACKAGE_NAME, LOCALEDIR);
161 textdomain (PACKAGE_NAME);
162 #endif //ENABLE_NLS
163
164 BgImage[0]='\0';
165 LogoImage[0]='\0';
166 LogoText[0]='\0';
167 PasswdFile[0]='\0';
168 OutputEmail[0]='\0';
169 UserAgentLog[0]='\0';
170 ExcludeHosts[0]='\0';
171 ExcludeUsers[0]='\0';
172 ConfigFile[0]='\0';
173 code[0]='\0';
174 LastLog=0;
175 ReportType=0UL;
176 UserTabFile[0]='\0';
177 BlockIt[0]='\0';
178 ExternalCSSFile[0]='\0';
179 SquidGuardLogFormat[0]='\0';
180 SquidGuardLogAlternate[0]='\0';
181 for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
182
183 sprintf(ExcludeCodes,"%s/exclude_codes",SYSCONFDIR);
184 strcpy(GraphDaysBytesBarColor,"orange");
185 strcpy(BgColor,"#ffffff");
186 strcpy(TxColor,"#000000");
187 strcpy(TxBgColor,"lavender");
188 strcpy(TiColor,"darkblue");
189 strcpy(Width,"80");
190 strcpy(Height,"45");
191 strcpy(LogoTextColor,"#000000");
192 strcpy(HeaderColor,"darkblue");
193 strcpy(HeaderBgColor,"#dddddd");
194 strcpy(LogoTextColor,"#006699");
195 strcpy(FontSize,"9px");
196 strcpy(TempDir,"/tmp");
197 strcpy(OutputDir,"/var/www/html/squid-reports");
198 Ip2Name=0;
199 strcpy(DateFormat,"u");
200 OverwriteReport=0;
201 RemoveTempFiles=1;
202 strcpy(ReplaceIndex,"index.html");
203 Index=INDEX_YES;
204 RecordsWithoutUser=RECORDWITHOUTUSER_IP;
205 UseComma=0;
206 strcpy(MailUtility,"mailx");
207 TopSitesNum=100;
208 TopUsersNum=0;
209 UserIp=0;
210 strcpy(TopuserSortField,"BYTES");
211 strcpy(UserSortField,"BYTES");
212 strcpy(TopuserSortOrder,"reverse");
213 strcpy(UserSortOrder,"reverse");
214 strcpy(TopsitesSortField,"CONNECT");
215 strcpy(TopsitesSortType,"D");
216 LongUrl=0;
217 strcpy(FontFace,"Verdana,Tahoma,Arial");
218 strcpy(datetimeby,"elap");
219 strcpy(CharSet,"ISO-8859-1");
220 Privacy=0;
221 strcpy(PrivacyString,"***.***.***.***");
222 strcpy(PrivacyStringColor,"blue");
223 SuccessfulMsg=1;
224 TopUserFields=TOPUSERFIELDS_NUM | TOPUSERFIELDS_DATE_TIME | TOPUSERFIELDS_USERID | TOPUSERFIELDS_CONNECT |
225 TOPUSERFIELDS_BYTES | TOPUSERFIELDS_SETYB | TOPUSERFIELDS_IN_CACHE_OUT |
226 TOPUSERFIELDS_USED_TIME | TOPUSERFIELDS_MILISEC | TOPUSERFIELDS_PTIME |
227 TOPUSERFIELDS_TOTAL | TOPUSERFIELDS_AVERAGE;
228 UserReportFields=USERREPORTFIELDS_CONNECT | USERREPORTFIELDS_BYTES | USERREPORTFIELDS_SETYB |
229 USERREPORTFIELDS_IN_CACHE_OUT | USERREPORTFIELDS_USED_TIME | USERREPORTFIELDS_MILISEC |
230 USERREPORTFIELDS_PTIME | USERREPORTFIELDS_TOTAL | USERREPORTFIELDS_AVERAGE;
231 strcpy(DataFileDelimiter,";");
232 DataFileFields=DATA_FIELD_USER | DATA_FIELD_DATE | DATA_FIELD_TIME | DATA_FIELD_URL | DATA_FIELD_CONNECT |
233 DATA_FIELD_BYTES | DATA_FIELD_IN_CACHE | DATA_FIELD_OUT_CACHE | DATA_FIELD_ELAPSED;
234 ShowReadStatistics=1;
235 strcpy(IndexSortOrder,"D");
236 ShowSargInfo=1;
237 ShowSargLogo=1;
238 strcpy(ParsedOutputLog,"no");
239 strcpy(ParsedOutputLogCompress,"/bin/gzip -f");
240 strcpy(DisplayedValues,"abbreviation");
241 strcpy(HeaderFontSize,"9px");
242 strcpy(TitleFontSize,"11px");
243 strcpy(AuthUserTemplateFile,"sarg_htaccess");
244 set_download_suffix("7z,ace,arj,avi,bat,bin,bz2,bzip,cab,com,cpio,dll,doc,dot,exe,gz,iso,lha,lzh,mdb,mov,mp3,mpeg,mpg,mso,nrg,ogg,ppt,rar,rtf,shs,src,sys,tar,tgz,vcd,vob,wma,wmv,zip");
245 Graphs=1;
246 #if defined(FONTDIR)
247 strcpy(GraphFont,FONTDIR"/DejaVuSans.ttf");
248 #else
249 GraphFont[0]='\0';
250 #endif
251 strcpy(Ulimit,"20000");
252 NtlmUserFormat=NTLMUSERFORMAT_DOMAINUSER;
253 IndexTree=INDEX_TREE_FILE;
254 strcpy(RealtimeTypes,"GET,PUT,CONNECT");
255 strcpy(RealtimeUnauthRec,"show");
256 SquidguardIgnoreDate=0;
257 DansguardianIgnoreDate=0;
258 DataFileUrl=DATAFILEURL_IP;
259 strcpy(MaxElapsed,"28800000");
260 BytesInSitesUsersReport=0;
261 UserAuthentication=0;
262 strcpy(LDAPHost,"127.0.0.1");
263 LDAPPort=389;
264 LDAPProtocolVersion=3;
265 LDAPBindDN[0]='\0';
266 LDAPBindPW[0]='\0';
267 LDAPBaseSearch[0]='\0';
268 strcpy(LDAPFilterSearch, "uid=%s");
269 strcpy(LDAPTargetAttr, "cn");
270
271 dia[0]='\0';
272 mes[0]='\0';
273 ano[0]='\0';
274 hora[0]='\0';
275 tmp[0]='\0';
276 tmp2[0]='\0';
277 tmp3[0]='\0';
278 wtemp[0]='\0';
279 wtemp2[0]='\0';
280 us[0]='\0';
281 date[0]='\0';
282 df[0]='\0';
283 uagent[0]='\0';
284 hexclude[0]='\0';
285 addr[0]='\0';
286 hm[0]='\0';
287 hmf[0]='\0';
288 site[0]='\0';
289 outdir[0]='\0';
290 elap[0]='\0';
291 email[0]='\0';
292 zip[0]='\0';
293 UserInvalidChar[0]='\0';
294 DataFile[0]='\0';
295 SquidGuardConf[0]='\0';
296 DansGuardianConf[0]='\0';
297 start_hour[0]='\0';
298 end_hour[0]='\0';
299
300 denied_count=0;
301 download_count=0;
302 authfail_count=0;
303 dansguardian_count=0;
304 squidguard_count=0;
305 useragent_count=0;
306 DeniedReportLimit=10;
307 AuthfailReportLimit=10;
308 DansGuardianReportLimit=10;
309 SquidGuardReportLimit=10;
310 DownloadReportLimit=50;
311 UserReportLimit=0;
312 debug=0;
313 debugz=0;
314 debugm=0;
315 iprel=0;
316 userip=0;
317 color1=0;
318 color2=0;
319 color3=0;
320 realt=0;
321 realtime_refresh=3;
322 realtime_access_log_lines=1000;
323 cost=0.01;
324 nocost=50000000;
325 ndownload=0;
326 squid24=0;
327
328 bzero(IncludeUsers, sizeof(IncludeUsers));
329 bzero(ExcludeString, sizeof(ExcludeString));
330 first_user_file=NULL;
331
332 NAccessLog=0;
333 for(x=0; x<MAXLOGS; x++)
334 AccessLog[x][0]='\0';
335 AccessLogFromCmdLine=0;
336
337 strcpy(Title,_("Squid User Access Report"));
338
339 while((ch = getopt(argc, argv, "a:b:c:d:e:f:g:u:l:L:o:s:t:w:hijmnprvxyz")) != -1){
340 switch(ch)
341 {
342 case 'a':
343 strcpy(addr,optarg);
344 break;
345 case 'b':
346 strcpy(uagent,optarg);
347 break;
348 case 'c':
349 strcpy(hexclude,optarg);
350 break;
351 case 'd':
352 strncpy(date,optarg,sizeof(date)-1);
353 date[sizeof(date)-1]='\0';
354 getword_start(&gwarea,optarg);
355 if (getword(cdfrom,sizeof(cdfrom),&gwarea,'-')<0 || getword(cduntil,sizeof(cduntil),&gwarea,0)<0) {
356 printf(_("SARG: The date range requested on the command line by option -d is invalid.\n"));
357 exit(EXIT_FAILURE);
358 }
359 date_from(date, cdfrom, cduntil);
360 dfrom=atoi(cdfrom);
361 duntil=atoi(cduntil);
362 break;
363 case 'e':
364 strcpy(email,optarg);
365 break;
366 case 'f':
367 strcpy(ConfigFile,optarg);
368 break;
369 case 'g':
370 strcpy(df,optarg);
371 break;
372 case 'h':
373 usage(argv[0]);
374 exit(0);
375 break;
376 case 'i':
377 iprel++;
378 break;
379 case 'l':
380 if (NAccessLog>=MAXLOGS) {
381 printf(_("SARG: Too many log files passed on command line with option -l.\n"));
382 exit(EXIT_FAILURE);
383 }
384 strcpy(AccessLog[NAccessLog],optarg);
385 NAccessLog++;
386 AccessLogFromCmdLine++;
387 break;
388 case 'L':
389 strcpy(SquidGuardLogAlternate,optarg);
390 break;
391 case 'm':
392 debugm++;
393 break;
394 case 'n':
395 dns++;
396 break;
397 case 'o':
398 strcpy(outdir,optarg);
399 break;
400 case 'p':
401 userip++;
402 break;
403 case 'r':
404 realt++;
405 break;
406 case 's':
407 strcpy(site,optarg);
408 break;
409 case 't':
410 {
411 int h,m;
412
413 if(strstr(optarg,"-") == 0) {
414 strcpy(hm,optarg);
415 strcpy(hmf,optarg);
416 } else {
417 getword_start(&gwarea,optarg);
418 if (getword(hm,sizeof(hm),&gwarea,'-')<0 || getword(hmf,sizeof(hmf),&gwarea,0)<0) {
419 debuga(_("The time range passed on the command line with option -t is invalid\n"));
420 exit(EXIT_FAILURE);
421 }
422 }
423 if(sscanf(hm,"%d:%d",&h,&m)!=2) {
424 debuga(_("Time period must be MM or MM:SS. Exit\n"));
425 exit(EXIT_FAILURE);
426 }
427 sprintf(hm,"%02d%02d",h,m);
428 if(sscanf(hmf,"%d:%d",&h,&m)!=2) {
429 debuga(_("Time period must be MM or MM:SS. Exit\n"));
430 exit(EXIT_FAILURE);
431 }
432 sprintf(hmf,"%02d%02d",h,m);
433 break;
434 }
435 case 'u':
436 strcpy(us,optarg);
437 break;
438 case 'v':
439 version();
440 break;
441 case 'w':
442 strcpy(tmp,optarg);
443 break;
444 case 'x':
445 debug++;
446 break;
447 case 'y':
448 langcode++;
449 break;
450 case 'z':
451 debugz++;
452 break;
453 case ':':
454 debuga(_("Option -%c require an argument\n"),optopt);
455 errflg++;
456 break;
457 case '?':
458 usage(argv[0]);
459 exit(EXIT_FAILURE);
460 break;
461 }
462
463 }
464
465 if (errflg) {
466 usage(argv[0]);
467 exit(2);
468 }
469
470 if(debug) debuga(_("Init\n"));
471
472 if(ConfigFile[0] == '\0') sprintf(ConfigFile,"%s/sarg.conf",SYSCONFDIR);
473 if(access(ConfigFile, R_OK) != 0) {
474 debuga(_("Cannot open config file: %s - %s\n"),ConfigFile,strerror(errno));
475 exit(EXIT_FAILURE);
476 }
477
478 if(access(ConfigFile, R_OK) == 0)
479 getconf();
480
481 if(UserIp) userip++;
482
483 if(dns) Ip2Name=1;
484
485 if(realt) {
486 realtime();
487 exit(0);
488 }
489
490 if(IndexTree == INDEX_TREE_FILE)
491 strcpy(ImageFile,"../images");
492 else
493 strcpy(ImageFile,"../../../images");
494
495 dataonly=0;
496 if(DataFile[0] != '\0')
497 dataonly++;
498
499 if(!NAccessLog) {
500 strcpy(AccessLog[0],"/var/log/squid/access.log");
501 NAccessLog++;
502 }
503
504 if(strcmp(hexclude,"onvert") == 0 && strcmp(site,"plit") != 0) {
505 convlog(AccessLog[0], df, dfrom, duntil);
506 exit(0);
507 }
508
509 if(strcmp(site,"plit") == 0) {
510 splitlog(AccessLog[0], df, dfrom, duntil, hexclude);
511 exit(0);
512 }
513
514 load_excludecodes(ExcludeCodes);
515
516 if(access(PasswdFile, R_OK) == 0) {
517 getusers(PasswdFile,debug);
518 puser++;
519 }
520
521 if(hexclude[0] == '\0')
522 strcpy(hexclude,ExcludeHosts);
523 if(hexclude[0] != '\0') {
524 gethexclude(hexclude,debug);
525 fhost++;
526 }
527
528 if(ReportType == 0) {
529 ReportType=REPORT_TYPE_TOPUSERS | REPORT_TYPE_TOPSITES | REPORT_TYPE_USERS_SITES |
530 REPORT_TYPE_SITES_USERS | REPORT_TYPE_DATE_TIME | REPORT_TYPE_DENIED |
531 REPORT_TYPE_AUTH_FAILURES | REPORT_TYPE_SITE_USER_TIME_DATE | REPORT_TYPE_DOWNLOADS;
532 }
533
534 if(access(ExcludeUsers, R_OK) == 0) {
535 getuexclude(ExcludeUsers,debug);
536 fuser++;
537 }
538
539 indexonly=0;
540 if(fuser) {
541 if(is_indexonly())
542 indexonly++;
543 }
544 if(strcmp(ExcludeUsers,"indexonly") == 0) indexonly++;
545 if(Index == INDEX_ONLY) indexonly++;
546
547 if(MaxElapsed[0] != '\0') max_elapsed=atol(MaxElapsed);
548
549 if(outdir[0] == '\0') strcpy(outdir,OutputDir);
550 strcat(outdir,"/");
551
552 if(uagent[0] == '\0') strcpy(uagent,UserAgentLog);
553
554 if(tmp[0] == '\0') strcpy(tmp,TempDir);
555 else strcpy(TempDir,tmp);
556
557 if(df[0] == '\0') strcpy(df,DateFormat);
558 else strcpy(DateFormat,df);
559
560 if(df[0] == '\0') {
561 strcpy(df,"u");
562 strcpy(DateFormat,"u");
563 }
564
565 if(email[0] == '\0' && OutputEmail[0] != '\0') strcpy(email,OutputEmail);
566
567 strcpy(tmp2,tmp);
568
569 if(email[0] != '\0') {
570 sprintf(wtemp2,"%s/sarg",tmp2);
571 my_mkdir(wtemp2);
572 strcat(tmp2,"/sarg");
573 strcpy(outdir,tmp2);
574 strcat(outdir,"/");
575 }
576
577 strcat(tmp2,"/sarg.log");
578
579 sprintf(tmp3,"%s/sarg",tmp);
580 if(access(tmp3, R_OK) == 0) {
581 unlinkdir(tmp3,1);
582 }
583 my_mkdir(tmp3);
584 strcpy(denied_unsort,tmp3);
585 strcpy(denied_sort,tmp3);
586 strcpy(authfail_unsort,tmp3);
587 strcat(denied_unsort,"/denied.log.unsort");
588 strcat(denied_sort,"/denied.log");
589 strcat(authfail_unsort,"/authfail.log.unsort");
590
591 if(debug) {
592 debuga(_("Parameters:\n"));
593 debuga(_(" Hostname or IP address (-a) = %s\n"),addr);
594 debuga(_(" Useragent log (-b) = %s\n"),uagent);
595 debuga(_(" Exclude file (-c) = %s\n"),hexclude);
596 debuga(_(" Date from-until (-d) = %s\n"),date);
597 debuga(_(" Email address to send reports (-e) = %s\n"),email);
598 debuga(_(" Config file (-f) = %s\n"),ConfigFile);
599 if(strcmp(df,"e") == 0)
600 debuga(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
601 if(strcmp(df,"u") == 0)
602 debuga(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
603 if(strcmp(df,"w") == 0)
604 debuga(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
605 debuga(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
606 for (iarq=0 ; iarq<NAccessLog ; iarq++)
607 debuga(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
608 debuga(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
609 debuga(_(" Output dir (-o) = %s\n"),outdir);
610 debuga(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
611 debuga(_(" Accessed site (-s) = %s\n"),site);
612 debuga(_(" Time (-t) = %s\n"),hm);
613 debuga(_(" User (-u) = %s\n"),us);
614 debuga(_(" Temporary dir (-w) = %s\n"),tmp);
615 debuga(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
616 debuga(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
617 debuga("\n");
618 }
619
620 if(debugm) {
621 printf(_("Parameters:\n"));
622 printf(_(" Hostname or IP address (-a) = %s\n"),addr);
623 printf(_(" Useragent log (-b) = %s\n"),uagent);
624 printf(_(" Exclude file (-c) = %s\n"),hexclude);
625 printf(_(" Date from-until (-d) = %s\n"),date);
626 printf(_(" Email address to send reports (-e) = %s\n"),email);
627 printf(_(" Config file (-f) = %s\n"),ConfigFile);
628 if(strcmp(df,"e") == 0)
629 printf(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
630 if(strcmp(df,"u") == 0)
631 printf(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
632 if(strcmp(df,"w") == 0)
633 printf(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
634 printf(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
635 for (iarq=0 ; iarq<NAccessLog ; iarq++)
636 printf(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
637 printf(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
638 printf(_(" Output dir (-o) = %s\n"),outdir);
639 printf(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
640 printf(_(" Accessed site (-s) = %s\n"),site);
641 printf(_(" Time (-t) = %s\n"),hm);
642 printf(_(" User (-u) = %s\n"),us);
643 printf(_(" Temporary dir (-w) = %s\n"),tmp);
644 printf(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
645 printf(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
646 printf(_("sarg version: %s\n"),VERSION);
647 }
648
649 if(debug)
650 debuga(_("sarg version: %s\n"),VERSION);
651
652 maxopenfiles=MAX_OPEN_USER_FILES;
653 #ifdef HAVE_RLIM_T
654 if (Ulimit[0] != '\0') {
655 struct rlimit rl;
656 long l1, l2;
657 int rc=0;
658
659 #if defined(RLIMIT_NOFILE)
660 getrlimit (RLIMIT_NOFILE, &rl);
661 #elif defined(RLIMIT_OFILE)
662 getrlimit (RLIMIT_OFILE, &rl);
663 #else
664 #warning "No rlimit resource for the number of open files"
665 #endif
666 l1 = rl.rlim_cur;
667 l2 = rl.rlim_max;
668
669 rl.rlim_cur = atol(Ulimit);
670 rl.rlim_max = atol(Ulimit);
671 #if defined(RLIMIT_NOFILE)
672 rc=setrlimit (RLIMIT_NOFILE, &rl);
673 #elif defined(RLIMIT_OFILE)
674 rc=setrlimit (RLIMIT_OFILE, &rl);
675 #else
676 #warning "No rlimit resource for the number of open files"
677 #endif
678 if(rc == -1) {
679 debuga(_("setrlimit error - %s\n"),strerror(errno));
680 }
681
682 if(debug)
683 debuga("Maximum file descriptor: cur=%ld max=%ld, changed to cur="RLIM_STRING" max="RLIM_STRING,l1,l2,rl.rlim_cur,rl.rlim_max);
684 }
685 #endif
686
687 init_usertab(UserTabFile);
688
689 if ((line=longline_create())==NULL) {
690 debuga(_("Not enough memory to read a log file\n"));
691 exit(EXIT_FAILURE);
692 }
693
694 sprintf ( sz_Download_Unsort , "%s/sarg/download.unsort", tmp);
695
696 if(DataFile[0]=='\0') {
697 if((ReportType & REPORT_TYPE_DENIED) != 0) {
698 if((fp_denied=MY_FOPEN(denied_unsort,"w"))==NULL) {
699 debuga(_("(log) Cannot open file: %s - %s\n"),denied_unsort,strerror(errno));
700 exit(EXIT_FAILURE);
701 }
702 }
703
704 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
705 if((fp_authfail=MY_FOPEN(authfail_unsort,"w"))==NULL) {
706 debuga(_("(log) Cannot open file: %s - %s\n"),authfail_unsort,strerror(errno));
707 exit(EXIT_FAILURE);
708 }
709 }
710 }
711
712 for (iarq=0 ; iarq<NAccessLog ; iarq++) {
713 strcpy(arq,AccessLog[iarq]);
714
715 strcpy(arqtt,arq);
716
717 if(strcmp(arq,"-")==0) {
718 if(debug)
719 debuga(_("Reading access log file: from stdin\n"));
720 fp_in=stdin;
721 from_stdin=1;
722 } else {
723 decomp(arq,zip,tmp);
724 if(debug)
725 debuga(_("Reading access log file: %s\n"),arq);
726 if((fp_in=MY_FOPEN(arq,"r"))==NULL) {
727 debuga(_("(log) Cannot open log file: %s - %s\n"),arq,strerror(errno));
728 exit(EXIT_FAILURE);
729 }
730 from_stdin=0;
731 }
732 ilf=ILF_Unknown;
733 download_flag=0;
734 // pre-read the file only if we have to show stats
735 if(ShowReadStatistics && !from_stdin) {
736 size_t nread,i;
737 int skipcr=0;
738 char tmp4[MAXLEN];
739
740 recs1=0UL;
741 recs2=0UL;
742
743 while ((nread=fread(tmp4,1,sizeof(tmp4),fp_in))>0) {
744 for (i=0 ; i<nread ; i++)
745 if (skipcr) {
746 if (tmp4[i]!='\n' && tmp4[i]!='\r') {
747 skipcr=0;
748 }
749 } else {
750 if (tmp4[i]=='\n' || tmp4[i]=='\r') {
751 skipcr=1;
752 recs1++;
753 }
754 }
755 }
756 rewind(fp_in);
757 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
758 putchar('\r');
759 fflush( stdout ) ;
760 }
761
762 longline_reset(line);
763
764 while ((linebuf=longline_read(fp_in,line))!=NULL) {
765 blen=strlen(linebuf);
766
767 if (ilf==ILF_Unknown) {
768 if(strncmp(linebuf,"#Software: Mic",14) == 0) {
769 fixendofline(linebuf);
770 if (debug)
771 debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
772 ilf=ILF_Isa;
773 ilf_count[ilf]++;
774 continue;
775 }
776
777 if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
778 getword_start(&gwarea,arqtt);
779 if (getword_skip(2000,&gwarea,'-')<0 || getword(val2,sizeof(val2),&gwarea,'_')<0 ||
780 getword_skip(10,&gwarea,'-')<0 || getword(val3,sizeof(val3),&gwarea,'_')<0) {
781 debuga(_("The name of the file is invalid: %s\n"),arq);
782 exit(EXIT_FAILURE);
783 }
784 sprintf(period,"%s-%s",val2,val3);
785 ilf=ILF_Sarg;
786 ilf_count[ilf]++;
787 continue;
788 }
789 }
790
791 if(!fp_log && strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg) {
792 if(access(ParsedOutputLog,R_OK) != 0) {
793 my_mkdir(ParsedOutputLog);
794 }
795 sprintf(arq_log,"%s/sarg_temp.log",ParsedOutputLog);
796 if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
797 debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
798 exit(EXIT_FAILURE);
799 }
800 fputs("*** SARG Log ***\n",fp_log);
801 }
802
803 recs2++;
804 if( ShowReadStatistics && !from_stdin && ! --OutputNonZero) {
805 perc = recs2 * 100 ;
806 perc = perc / recs1 ;
807 printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,perc);
808 putchar('\r');
809 fflush (stdout);
810 OutputNonZero = REPORT_EVERY_X_LINES ;
811 }
812 if(blen < 58) continue;
813 if(strstr(linebuf,"HTTP/0.0") != 0) continue;
814 if(strstr(linebuf,"logfile turned over") != 0) continue;
815 if(linebuf[0] == ' ') continue;
816
817 // exclude_string
818 if(ExcludeString[0] != '\0') {
819 exstring=0;
820 getword_start(&gwarea,ExcludeString);
821 while(strchr(gwarea.current,':') != 0) {
822 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
823 debuga(_("Maybe you have a broken record or garbage in your exclusion string\n"));
824 exit(EXIT_FAILURE);
825 }
826 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL )
827 exstring++;
828 }
829 if((str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
830 exstring++;
831 if(exstring) continue;
832 }
833
834 totregsl++;
835 if(debugm)
836 printf("BUF=%s\n",linebuf);
837
838 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
839 getword_start(&gwarea,linebuf);
840 if (getword(data,sizeof(data),&gwarea,' ')<0) {
841 debuga(_("Maybe you have a broken time in your access.log file\n"));
842 exit(EXIT_FAILURE);
843 }
844 if((str=(char *) strchr(data, '.')) != (char *) NULL ) {
845 if((str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
846 strcpy(ip,data);
847 strcpy(elap,"0");
848 if(squid24) {
849 if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
850 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
851 exit(EXIT_FAILURE);
852 }
853 } else {
854 if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
855 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
856 exit(EXIT_FAILURE);
857 }
858 }
859 if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
860 getword(fun,sizeof(fun),&gwarea,' ')<0) {
861 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
862 exit(EXIT_FAILURE);
863 }
864 if (getword_ptr(linebuf,&url,&gwarea,' ')<0) {
865 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
866 exit(EXIT_FAILURE);
867 }
868 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword(code2,sizeof(code2),&gwarea,' ')<0 ||
869 getword(tam,sizeof(tam),&gwarea,' ')<0) {
870 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
871 exit(EXIT_FAILURE);
872 }
873 if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
874 if (getword(code,sizeof(code),&gwarea,' ')<0) {
875 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
876 exit(EXIT_FAILURE);
877 }
878 } else {
879 if (getword(code,sizeof(code),&gwarea,'\0')<0) {
880 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
881 exit(EXIT_FAILURE);
882 }
883 }
884
885 if ((str = strchr(code, ':')) != NULL)
886 *str = '/';
887
888 if(strcmp(tam,"\0") == 0)
889 strcpy(tam,"0");
890
891 ilf=ILF_Common;
892 ilf_count[ilf]++;
893 }
894 }
895
896 if(ilf==ILF_Unknown || ilf==ILF_Squid) {
897 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
898 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
899 exit(EXIT_FAILURE);
900 }
901 while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
902 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
903 debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq);
904 exit(EXIT_FAILURE);
905 }
906 if(strlen(elap) < 1) continue;
907 if (getword(ip,sizeof(ip),&gwarea,' ')<0){
908 debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq);
909 exit(EXIT_FAILURE);
910 }
911 if (getword(code,sizeof(code),&gwarea,' ')<0){
912 debuga(_("Maybe you have a broken result code in your %s file\n"),arq);
913 exit(EXIT_FAILURE);
914 }
915 if (getword(tam,sizeof(tam),&gwarea,' ')<0){
916 debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq);
917 exit(EXIT_FAILURE);
918 }
919 if (getword(fun,sizeof(fun),&gwarea,' ')<0){
920 debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
921 exit(EXIT_FAILURE);
922 }
923 if (getword_ptr(linebuf,&url,&gwarea,' ')<0){
924 debuga(_("Maybe you have a broken url in your %s file\n"),arq);
925 exit(EXIT_FAILURE);
926 }
927 if (getword(user,sizeof(user),&gwarea,' ')<0){
928 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
929 exit(EXIT_FAILURE);
930 }
931 ilf=ILF_Squid;
932 ilf_count[ilf]++;
933 }
934 }
935 if (ilf==ILF_Sarg) {
936 getword_start(&gwarea,linebuf);
937 if (getword(data,sizeof(data),&gwarea,'\t')<0){
938 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
939 exit(EXIT_FAILURE);
940 }
941 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
942 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
943 exit(EXIT_FAILURE);
944 }
945 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
946 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
947 exit(EXIT_FAILURE);
948 }
949 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
950 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
951 exit(EXIT_FAILURE);
952 }
953 if (getword_ptr(linebuf,&url,&gwarea,'\t')<0){
954 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
955 exit(EXIT_FAILURE);
956 }
957 if (getword(tam,sizeof(tam),&gwarea,'\t')<0){
958 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
959 exit(EXIT_FAILURE);
960 }
961 if (getword(code,sizeof(code),&gwarea,'\t')<0){
962 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
963 exit(EXIT_FAILURE);
964 }
965 if (getword(elap,sizeof(elap),&gwarea,'\t')<0){
966 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
967 exit(EXIT_FAILURE);
968 }
969 if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
970 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
971 exit(EXIT_FAILURE);
972 }
973 }
974 if (ilf==ILF_Isa) {
975 if (linebuf[0] == '#') {
976 int ncols,cols[ISACOL_Last];
977
978 fixendofline(linebuf);
979 getword_start(&gwarea,linebuf);
980 // remove the #Fields: column at the beginning of the line
981 if (getword_skip(1000,&gwarea,' ')<0){
982 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
983 exit(EXIT_FAILURE);
984 }
985 for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
986 ncols=0;
987 while(gwarea.current[0] != '\0') {
988 if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
989 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
990 exit(EXIT_FAILURE);
991 }
992 if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
993 if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
994 if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
995 if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
996 if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
997 if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
998 if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
999 if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
1000 ncols++;
1001 }
1002 if (cols[ISACOL_Ip]>=0) {
1003 isa_ncols=ncols;
1004 for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
1005 isa_cols[ncols]=cols[ncols];
1006 }
1007 continue;
1008 }
1009 if (!isa_ncols) continue;
1010 getword_start(&gwarea,linebuf);
1011 for (x=0 ; x<isa_ncols ; x++) {
1012 if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
1013 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1014 exit(EXIT_FAILURE);
1015 }
1016 if (x==isa_cols[ISACOL_Ip]) {
1017 if (strlen(str)>=sizeof(ip)) {
1018 debuga(_("Maybe you have a broken IP in your %s file\n"),arq);
1019 exit(EXIT_FAILURE);
1020 }
1021 strcpy(ip,str);
1022 } else if (x==isa_cols[ISACOL_UserName]) {
1023 if (strlen(str)>=sizeof(user)) {
1024 debuga(_("Maybe you have a broken user ID in your %s file\n"),arq);
1025 exit(EXIT_FAILURE);
1026 }
1027 strcpy(user,str);
1028 } else if (x==isa_cols[ISACOL_Date]) {
1029 if (strlen(str)>=sizeof(data)) {
1030 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1031 exit(EXIT_FAILURE);
1032 }
1033 strcpy(data,str);
1034 } else if (x==isa_cols[ISACOL_Time]) {
1035 if (strlen(str)>=sizeof(hora)) {
1036 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1037 exit(EXIT_FAILURE);
1038 }
1039 strcpy(hora,str);
1040 } else if (x==isa_cols[ISACOL_TimeTaken]) {
1041 if (strlen(str)>=sizeof(elap)) {
1042 debuga(_("Maybe you have a broken download duration in your %s file\n"),arq);
1043 exit(EXIT_FAILURE);
1044 }
1045 strcpy(elap,str);
1046 } else if (x==isa_cols[ISACOL_Bytes]) {
1047 if (strlen(str)>=sizeof(tam)) {
1048 debuga(_("Maybe you have a broken download size in your %s file\n"),arq);
1049 exit(EXIT_FAILURE);
1050 }
1051 strcpy(tam,str);
1052 } else if (x==isa_cols[ISACOL_Uri]) {
1053 url=str;
1054 } else if (x==isa_cols[ISACOL_Status]) {
1055 if (strlen(str)>=sizeof(code)) {
1056 debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
1057 exit(EXIT_FAILURE);
1058 }
1059 strcpy(code,str);
1060 }
1061 }
1062
1063 if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
1064 sprintf(val1,"DENIED/%s",code);
1065 strcpy(code,val1);
1066 }
1067 getword_start(&gwarea,data);
1068 if (getword(ano,sizeof(ano),&gwarea,'-')<0){
1069 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1070 exit(EXIT_FAILURE);
1071 }
1072 if (getword(mes,sizeof(mes),&gwarea,'-')<0){
1073 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1074 exit(EXIT_FAILURE);
1075 }
1076 if (getword(dia,sizeof(dia),&gwarea,'\0')<0){
1077 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
1078 exit(EXIT_FAILURE);
1079 }
1080 conv_month_name(mes);
1081 sprintf(data," %s/%s/%s:%s",dia,mes,ano,hora);
1082 }
1083
1084 if(ilf==ILF_Squid) {
1085 tt=atoi(data);
1086 t=localtime(&tt);
1087
1088 strftime(tbuf2, sizeof(tbuf2), "%H%M", t);
1089 sprintf(mes,"%d",t->tm_mon+1);
1090 conv_month_name(mes);
1091 if(strncmp(df,"u",1) == 0)
1092 sprintf(tbuf, "%04d%s%02d", t->tm_year+1900, mes, t->tm_mday);
1093 if(strncmp(df,"e",1) == 0)
1094 sprintf(tbuf, "%02d%s%04d", t->tm_mday, mes, t->tm_year+1900);
1095 if(strncmp(df,"w",1) == 0) {
1096 IndexTree=INDEX_TREE_FILE;
1097 strftime(tbuf, sizeof(tbuf), "%Y.%U", t);
1098 }
1099
1100 strftime(wdata, sizeof(wdata), "%Y%m%d", t);
1101 idata=atoi(wdata);
1102
1103 if(strncmp(df,"u",1)==0)
1104 strftime(dia, sizeof(dia), "%m/%d/%Y", t);
1105 else
1106 strftime(dia, sizeof(dia), "%d/%m/%Y", t);
1107 sprintf(hora,"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec);
1108 } else if(ilf==ILF_Common || ilf==ILF_Isa) {
1109 getword_start(&gwarea,data+1);
1110 if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
1111 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1112 exit(EXIT_FAILURE);
1113 }
1114 if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
1115 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1116 exit(EXIT_FAILURE);
1117 }
1118 getword_start(&gwarea,data);
1119 if (getword(dia,sizeof(dia),&gwarea,'/')<0){
1120 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1121 exit(EXIT_FAILURE);
1122 }
1123 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
1124 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1125 exit(EXIT_FAILURE);
1126 }
1127 if (getword(ano,sizeof(ano),&gwarea,'/')<0){
1128 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1129 exit(EXIT_FAILURE);
1130 }
1131
1132 if(strcmp(df,"u") == 0)
1133 snprintf(tbuf,sizeof(tbuf),"%s%s%s",ano,mes,dia);
1134 if(strcmp(df,"e") == 0)
1135 snprintf(tbuf,sizeof(tbuf),"%s%s%s",dia,mes,ano);
1136 builddia(dia,mes,ano,df,wdata);
1137 idata=atoi(wdata);
1138 tt=computedate(ano,mes,dia);
1139 t=localtime(&tt);
1140 } else if (ilf==ILF_Sarg) {
1141 getword_start(&gwarea,data);
1142 if(strcmp(df,"u") == 0) {
1143 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
1144 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1145 exit(EXIT_FAILURE);
1146 }
1147 if (getword(dia,sizeof(dia),&gwarea,'/')<0){
1148 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1149 exit(EXIT_FAILURE);
1150 }
1151 } else {
1152 if (getword(dia,sizeof(dia),&gwarea,'/')<0){
1153 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1154 exit(EXIT_FAILURE);
1155 }
1156 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
1157 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1158 exit(EXIT_FAILURE);
1159 }
1160 }
1161 if (getword(ano,sizeof(ano),&gwarea,0)<0){
1162 debuga(_("Maybe you have a broken date in your %s file\n"),arq);
1163 exit(EXIT_FAILURE);
1164 }
1165 snprintf(wdata,9,"%s%s%s",ano,mes,dia);
1166 idata=atoi(wdata);
1167 tt=computedate(ano,mes,dia);
1168 t=localtime(&tt);
1169 } else {
1170 t=NULL;
1171 }
1172
1173 if(debugm)
1174 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",date,idata,dfrom,duntil);
1175
1176 if(date[0] != '\0'){
1177 if(idata < dfrom || idata > duntil) continue;
1178 }
1179
1180 // Record only hours usage which is required
1181 if (t) {
1182 if( bsearch( &( t -> tm_wday ), weekdays.list, weekdays.len,
1183 sizeof( int ), compar ) == NULL )
1184 continue;
1185
1186 if( bsearch( &( t -> tm_hour ), hours.list, hours.len,
1187 sizeof( int ), compar ) == NULL )
1188 continue;
1189 }
1190
1191
1192 if(strlen(user) > MAX_USER_LEN) {
1193 if (debugm) printf(_("User ID too long: %s\n"),user);
1194 totregsx++;
1195 continue;
1196 }
1197
1198 // include_users
1199 if(IncludeUsers[0] != '\0') {
1200 sprintf(val1,":%s:",user);
1201 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
1202 continue;
1203 }
1204
1205 if(vercode(code)) {
1206 if (debugm) printf(_("Excluded code: %s\n"),code);
1207 totregsx++;
1208 continue;
1209 }
1210
1211 if(testvaliduserchar(user))
1212 continue;
1213
1214 #if 0
1215 if((str = strstr(user,"%20")) != NULL) {
1216 /*
1217 This is a patch introduced to solve bug #1624251 reported at sourceforge but
1218 the side effect is to truncate the name at the first space and merge the reports
1219 of people whose name is identical up to the first space.
1220
1221 The old code used to truncate the user name at the first % if a %20 was
1222 found anywhere in the string. That means the string could be truncated
1223 at the wrong place if another % occured before the %20. This new code should
1224 avoid that problem and only truncate at the space. There is no bug
1225 report indicating that anybody noticed this.
1226 */
1227 *str='\0';
1228 }
1229
1230 /*
1231 Code prior to 2.2.7 used to replace any %xx by a dot as long as a %5c was
1232 found in the user name.
1233 */
1234 while((str = strstr(user,"%5c")) != NULL) {
1235 *str='.';
1236 for (x=3 ; str[x] ; x++) str[x-2]=str[x];
1237 }
1238 #endif
1239
1240 urly=url;
1241
1242 if(ilf!=ILF_Sarg) {
1243 /*
1244 The full URL is not saved in sarg log. There is no point in testing the URL to detect
1245 a downloaded file.
1246 */
1247 download_flag=is_download_suffix(url);
1248 if (download_flag) {
1249 download_url=url;
1250 download_count++;
1251 }
1252 } else
1253 download_flag=0;
1254
1255 // remove any protocol:// at the beginning of the URL
1256 if ((str = strchr(url,'/')) != NULL && str[1] == '/') {
1257 int i;
1258
1259 str+=2;
1260 for (i=0 ; str[i] ; i++)
1261 url[i]=str[i];
1262 url[i]='\0';
1263 }
1264
1265 if(!LongUrl) {
1266 url_hostname(url,hostname,sizeof(hostname));
1267 url=hostname;
1268 }
1269
1270 if(url[0] == '\0') continue;
1271
1272 if(addr[0] != '\0'){
1273 if(strcmp(addr,ip)!=0) continue;
1274 }
1275 if(fhost) {
1276 if(!vhexclude(url)) {
1277 if (debugm) printf(_("Excluded site: %s\n"),url);
1278 totregsx++;
1279 continue;
1280 }
1281 }
1282
1283 if(hm[0] != '\0') {
1284 hmr[0]='\0';
1285 chm++;
1286 getword_start(&gwarea,hora);
1287 while(chm) {
1288 if (getword_multisep(warea,sizeof(warea),&gwarea,':')<0){
1289 debuga(_("Maybe you have a broken time in your %s file\n"),arq);
1290 exit(EXIT_FAILURE);
1291 }
1292 strncat(hmr,warea,2);
1293 chm--;
1294 }
1295 strncat(hmr,gwarea.current,2);
1296
1297 if(atoi(hmr) < atoi(hm) || atoi(hmr) > atoi(hmf)) continue;
1298 }
1299
1300 if(site[0] != '\0'){
1301 if(strstr(url,site)==0) continue;
1302 }
1303
1304 if(userip) {
1305 strcpy(user,ip);
1306 id_is_ip=1;
1307 } else {
1308 id_is_ip=0;
1309 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
1310 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
1311 strcpy(user,ip);
1312 id_is_ip=1;
1313 }
1314 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
1315 continue;
1316 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
1317 strcpy(user,"everybody");
1318 } else {
1319 strlow(user);
1320 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
1321 if((str = strchr(user,'_')) != 0) {
1322 strcpy(warea,str+1);
1323 strcpy(user,warea);
1324 }
1325 if((str = strchr(user,'+')) != 0) {
1326 strcpy(warea,str+1);
1327 strcpy(user,warea);
1328 }
1329 }
1330 }
1331 }
1332
1333 if(us[0] != '\0'){
1334 if(strcmp(user,us)!=0) continue;
1335 }
1336
1337 if(puser) {
1338 sprintf(wuser,":%s:",user);
1339 if(strstr(userfile, wuser) == 0)
1340 continue;
1341 }
1342
1343 if(fuser) {
1344 if(!vuexclude(user)) {
1345 if (debugm) printf(_("Excluded user: %s\n"),user);
1346 totregsx++;
1347 continue;
1348 }
1349 }
1350
1351 if(strcmp(user,"-") ==0 || strcmp(user," ") ==0 || strcmp(user,"") ==0 || strcmp(user,":") ==0)
1352 continue;
1353
1354 if(max_elapsed) {
1355 if(atol(elap)>max_elapsed) {
1356 elap[0]='0';
1357 elap[1]='\0';
1358 }
1359 }
1360
1361 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
1362 fixendofline(str);
1363 sprintf(smartfilter,"\"%s\"",str+1);
1364 } else sprintf(smartfilter,"\"\"");
1365
1366 nopen=0;
1367 prev_ufile=NULL;
1368 for (ufile=first_user_file ; ufile && strcmp(user,ufile->user->id)!=0 ; ufile=ufile->next) {
1369 prev_ufile=ufile;
1370 if (ufile->file) nopen++;
1371 }
1372 if (!ufile) {
1373 ufile=malloc(sizeof(*ufile));
1374 if (!ufile) {
1375 debuga(_("Not enough memory to store the user %s\n"),user);
1376 exit(EXIT_FAILURE);
1377 }
1378 memset(ufile,0,sizeof(*ufile));
1379 ufile->next=first_user_file;
1380 first_user_file=ufile;
1381 uinfo=userinfo_create(user);
1382 ufile->user=uinfo;
1383 uinfo->id_is_ip=id_is_ip;
1384 } else {
1385 if (prev_ufile) {
1386 prev_ufile->next=ufile->next;
1387 ufile->next=first_user_file;
1388 first_user_file=ufile;
1389 }
1390 }
1391
1392 if (ufile->file==NULL) {
1393 if (nopen>=maxopenfiles) {
1394 x=0;
1395 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
1396 if (ufile1->file!=NULL) {
1397 if (x>=maxopenfiles) {
1398 fclose(ufile1->file);
1399 ufile1->file=NULL;
1400 }
1401 x++;
1402 }
1403 }
1404 }
1405 if (snprintf (tmp3, sizeof(tmp3), "%s/sarg/%s.unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
1406 debuga(_("Temporary user file name too long: %s/sarg/%s.unsort\n"), tmp, ufile->user->filename);
1407 exit(EXIT_FAILURE);
1408 }
1409 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
1410 debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno));
1411 exit (1);
1412 }
1413 }
1414
1415 /*if ( strcmp ( user , sz_Last_User ) != 0 ) {
1416 if ( fp_Write_User )
1417 fclose( fp_Write_User ) ;
1418 sprintf (tmp3, "%s/sarg/%s.unsort", tmp, user);
1419
1420 if ((fp_Write_User = MY_FOPEN (tmp3, "a")) == NULL) {
1421 fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], _("Cannot open temporary file"), tmp3, strerror(errno));
1422 exit (1);
1423 }
1424 strcpy( sz_Last_User , user ) ;
1425 }*/
1426 fprintf(ufile->file, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,ip,url,tam,code,elap,smartfilter);
1427
1428 if(fp_log && ilf!=ILF_Sarg)
1429 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,url,tam,code,elap,smartfilter);
1430
1431 totregsg++;
1432
1433 if(!dataonly && download_flag && download_url && strstr(code,"DENIED") == 0) {
1434 ndownload = 1;
1435
1436 if ( ! fp_Download_Unsort ) {
1437 if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
1438 debuga(_("(log) Cannot open temporary file: %s - %s\n"),tmp3, strerror(errno));
1439 exit (1);
1440 }
1441 }
1442 fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,download_url);
1443 }
1444
1445 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
1446 if(fp_denied && strstr(code,"DENIED/403") != 0) {
1447 fprintf(fp_denied, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1448 denied_count++;
1449 }
1450 if(fp_authfail && (strstr(code,"DENIED/401") != 0 || strstr(code,"DENIED/407") != 0)) {
1451 fprintf(fp_authfail, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1452 authfail_count++;
1453 }
1454 }
1455
1456 if((!totper || idata<mindate) && ilf!=ILF_Sarg){
1457 totper++;
1458 mindate=idata;
1459 sprintf(period,"%s-",tbuf);
1460 strcpy(start_hour,tbuf2);
1461 if(date[0] != '\0')
1462 fixper(tbuf, period, cduntil);
1463 if(debugz){
1464 debugaz("tbuf",tbuf);
1465 debugaz("period",period);
1466 }
1467 }
1468
1469 if(debugm){
1470 printf("IP=\t%s\n",ip);
1471 printf("USER=\t%s\n",user);
1472 printf("ELAP=\t%s\n",elap);
1473 printf("DATE=\t%s\n",dia);
1474 printf("TIME=\t%s\n",hora);
1475 printf("FUNC=\t%s\n",fun);
1476 printf("URL=\t%s\n",url);
1477 printf("CODE=\t%s\n",code);
1478 printf("LEN=\t%s\n",tam);
1479 }
1480 }
1481 if (!from_stdin) {
1482 fclose(fp_in);
1483 if( ShowReadStatistics )
1484 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs1, (float) 100 );
1485 }
1486 }
1487
1488 longline_destroy(&line);
1489 if ( fp_Download_Unsort )
1490 fclose (fp_Download_Unsort);
1491
1492 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
1493 ufile1=ufile->next;
1494 if (ufile->file!=NULL) fclose(ufile->file);
1495 free(ufile);
1496 }
1497
1498 free_download();
1499 free_excludecodes();
1500 free_exclude();
1501
1502 if(debug) {
1503 int totalcount=0;
1504
1505 for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
1506
1507 debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
1508
1509 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
1510 debuga(_("Log with mixed records format (squid and common log)\n"));
1511
1512 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
1513 debuga(_("Common log format\n"));
1514
1515 if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
1516 debuga(_("Squid log format\n"));
1517
1518 if(ilf_count[ILF_Sarg]>0)
1519 debuga(_("Sarg log format\n"));
1520
1521 if(totalcount==0) {
1522 if(!totregsg) {
1523 debuga(_("No records found\n"));
1524 debuga(_("End\n"));
1525 } else debuga(_("Log with invalid format\n"));
1526 if(fp_denied)
1527 fclose(fp_denied);
1528 if(fp_authfail)
1529 fclose(fp_authfail);
1530 userinfo_free();
1531 if(userfile)
1532 free(userfile);
1533 close_usertab();
1534 unlink(denied_unsort);
1535 unlink(authfail_unsort);
1536 unlink(tmp3);
1537 exit(0);
1538 }
1539 }
1540
1541 if(!totregsg){
1542 debuga(_("No records found\n"));
1543 debuga(_("End\n"));
1544 // fclose(fp_ou);
1545 if(fp_denied)
1546 fclose(fp_denied);
1547 if(fp_authfail)
1548 fclose(fp_authfail);
1549 userinfo_free();
1550 if(userfile)
1551 free(userfile);
1552 close_usertab();
1553 exit(0);
1554 }
1555
1556 if(date[0] == '\0' && ilf_count[ILF_Sarg]==0) {
1557 strcat(period,tbuf);
1558 }
1559
1560 if(debugz){
1561 debugaz("data",dia);
1562 debugaz("tbuf",tbuf);
1563 debugaz("period",period);
1564 }
1565
1566 if(debug)
1567 debuga(_("Period: %s\n"),period);
1568
1569 // fclose(fp_ou);
1570 if(fp_denied)
1571 fclose(fp_denied);
1572 if(fp_authfail)
1573 fclose(fp_authfail);
1574
1575 if(fp_log != NULL) {
1576 fclose(fp_log);
1577 strcpy(end_hour,tbuf2);
1578 getword_start(&gwarea,period);
1579 if (getword(val2,sizeof(val2),&gwarea,'-')<0){
1580 debuga(_("Maybe you have a broken date range definition.\n"));
1581 exit(EXIT_FAILURE);
1582 }
1583 if (getword(val1,sizeof(val1),&gwarea,'\0')<0){
1584 debuga(_("Maybe you have a broken date range definition.\n"));
1585 exit(EXIT_FAILURE);
1586 }
1587 sprintf(val4,"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour);
1588 if (rename(arq_log,val4)) {
1589 debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno));
1590 } else {
1591 strcpy(arq_log,val4);
1592
1593 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
1594 /*
1595 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
1596 necessary around the command name, put them in the configuration file.
1597 */
1598 sprintf(val1,"%s \"%s\"",ParsedOutputLogCompress,arq_log);
1599 cstatus=system(val1);
1600 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1601 debuga(_("command return status %d\n"),WEXITSTATUS(cstatus));
1602 debuga(_("command: %s\n"),val1);
1603 exit(EXIT_FAILURE);
1604 }
1605 }
1606 }
1607 if(debug)
1608 debuga(_("Sarg parsed log saved as %s\n"),arq_log);
1609 }
1610
1611 if(DataFile[0] == '\0' && (ReportType & REPORT_TYPE_DENIED) != 0) {
1612 sprintf(csort,"sort -T \"%s\" -k 3,3 -k 5,5 -o \"%s\" \"%s\"",tmp,denied_sort,denied_unsort);
1613 cstatus=system(csort);
1614 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1615 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
1616 debuga(_("sort command: %s\n"),csort);
1617 exit(EXIT_FAILURE);
1618 }
1619 unlink(denied_unsort);
1620 }
1621
1622 sort_users_log(tmp, debug);
1623
1624 if(DataFile[0] != '\0')
1625 data_file(tmp);
1626 else
1627 gerarel();
1628
1629 unlink(tmp2);
1630 if((ReportType & REPORT_TYPE_DENIED) != 0)
1631 unlink(denied_sort);
1632
1633 if(zip[0] != '\0' && strcmp(zip,"zcat") !=0) {
1634 recomp(arq, zip);
1635 }
1636 // else unlink(arq);
1637
1638 if(strcmp(tmp,"/tmp") != 0) {
1639 unlinkdir(tmp,0);
1640 }
1641
1642 userinfo_free();
1643 if(userfile)
1644 free(userfile);
1645 close_usertab();
1646
1647 if(debug)
1648 debuga(_("End\n"));
1649
1650 exit(0);
1651
1652 }
1653
1654
1655 static void getusers(const char *pwdfile, int debug)
1656 {
1657
1658 FILE *fp_usr;
1659 char buf[255];
1660 char *str;
1661 long int nreg=0;
1662
1663 if(debug)
1664 debuga(_("Loading password file from %s\n"),pwdfile);
1665
1666 if ((fp_usr = fopen(pwdfile, "r")) == NULL) {
1667 debuga(_("(getusers) Cannot open file %s - %s\n"),pwdfile,strerror(errno));
1668 exit(EXIT_FAILURE);
1669 }
1670
1671 fseek(fp_usr, 0, SEEK_END);
1672 nreg = ftell(fp_usr);
1673 if (nreg<0) {
1674 debuga(_("Cannot get the size of file %s\n"),pwdfile);
1675 exit(EXIT_FAILURE);
1676 }
1677 nreg = nreg+5000;
1678 fseek(fp_usr, 0, SEEK_SET);
1679
1680 if((userfile=(char *) malloc(nreg))==NULL){
1681 debuga(_("malloc error (%ld)\n"),nreg);
1682 exit(EXIT_FAILURE);
1683 }
1684
1685 bzero(userfile,nreg);
1686 strcpy(userfile,":");
1687
1688 while(fgets(buf,sizeof(buf),fp_usr)!=NULL) {
1689 str=strchr(buf,':');
1690 if (!str) {
1691 debuga(_("You have an invalid user in your %s file\n"),pwdfile);
1692 exit(EXIT_FAILURE);
1693 }
1694 str[1]=0;
1695 strcat(userfile,buf);
1696 }
1697
1698 fclose(fp_usr);
1699
1700 return;
1701 }