]> git.ipfire.org Git - thirdparty/sarg.git/blob - log.c
Fix the creation of the datafile
[thirdparty/sarg.git] / log.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2010
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 #define REPORT_EVERY_X_LINES 5000
31 #define MAX_OPEN_USER_FILES 10
32
33 struct userfilestruct
34 {
35 struct userfilestruct *next;
36 struct userinfostruct *user;
37 FILE *file;
38 };
39
40 static char *userfile=NULL;
41
42 numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
43 numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
44 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
45
46 static void getusers(const char *pwdfile, int debug);
47
48 int main(int argc,char *argv[])
49 {
50 enum isa_col_id {
51 ISACOL_Ip,
52 ISACOL_UserName,
53 ISACOL_Date,
54 ISACOL_Time,
55 ISACOL_TimeTaken,
56 ISACOL_Bytes,
57 ISACOL_Uri,
58 ISACOL_Status,
59 ISACOL_Last //last entry of the list !
60 };
61 enum InputLogFormat {
62 ILF_Unknown,
63 ILF_Squid,
64 ILF_Common,
65 ILF_Sarg,
66 ILF_Isa,
67 ILF_Last //last entry of the list !
68 };
69
70 FILE *fp_in = NULL, *fp_denied=NULL, *fp_authfail=NULL, *fp_log=NULL;
71
72 char sz_Download_Unsort[ 20000 ] ;
73 FILE * fp_Download_Unsort = NULL ;
74
75 extern int optind;
76 extern int optopt;
77 extern char *optarg;
78
79 char data[255];
80 char elap[255];
81 char ip[MAXLEN];
82 char tam[255];
83 char fun[MAXLEN];
84 char wuser[MAXLEN];
85 char smartfilter[MAXLEN];
86 char dia[128];
87 char wdata[128];
88 char mes[30];
89 char ano[30];
90 char hora[30];
91 char wtemp[MAXLEN];
92 char wtemp2[255];
93 char date[255];
94 char arq[255];
95 char arq_log[255];
96 char hm[15], hmf[15], hmr[15];
97 int chm=0;
98 char uagent[MAXLEN];
99 char hexclude[MAXLEN];
100 char csort[MAXLEN];
101 int cstatus;
102 char tbuf[128];
103 char tbuf2[128];
104 char zip[20];
105 char *str;
106 char tmp2[MAXLEN];
107 char tmp3[MAXLEN];
108 char denied_unsort[MAXLEN];
109 char denied_sort[MAXLEN];
110 char authfail_unsort[MAXLEN];
111 char start_hour[128];
112 char end_hour[128];
113 char *linebuf;
114 char hostname[512];
115 char *url;
116 char *urly;
117 char user[MAX_USER_LEN];
118 enum InputLogFormat ilf;
119 int ilf_count[ILF_Last];
120 int ch;
121 int x;
122 int errflg=0;
123 int puser=0;
124 int fhost=0;
125 int dns=0;
126 int fuser=0;
127 int idata=0;
128 int mindate=0;
129 int iarq=0;
130 int exstring=0;
131 int isa_ncols=0,isa_cols[ISACOL_Last];
132 int from_stdin;
133 int blen;
134 int maxopenfiles;
135 int nopen;
136 int id_is_ip;
137 long totregsl=0;
138 long totregsg=0;
139 long totregsx=0;
140 long totper=0;
141 long int max_elapsed=0;
142 time_t tt;
143 struct tm *t;
144 unsigned long recs1=0UL;
145 unsigned long recs2=0UL;
146 int OutputNonZero = REPORT_EVERY_X_LINES ;
147 int download_flag=0;
148 char *download_url=NULL;
149 struct getwordstruct gwarea;
150 struct longlinestruct line;
151 struct userinfostruct *uinfo;
152 struct userfilestruct *first_user_file, *ufile, *ufile1, *prev_ufile;
153
154 #ifdef HAVE_LOCALE_H
155 setlocale(LC_TIME,"");
156 #endif
157
158 #if defined(ENABLE_NLS) && defined(HAVE_LOCALE_H)
159 setlocale (LC_ALL, "");
160 bindtextdomain (PACKAGE_NAME, LOCALEDIR);
161 textdomain (PACKAGE_NAME);
162 #endif //ENABLE_NLS
163
164 BgImage[0]='\0';
165 LogoImage[0]='\0';
166 LogoText[0]='\0';
167 PasswdFile[0]='\0';
168 OutputEmail[0]='\0';
169 UserAgentLog[0]='\0';
170 ExcludeHosts[0]='\0';
171 ExcludeUsers[0]='\0';
172 ConfigFile[0]='\0';
173 code[0]='\0';
174 LastLog=0;
175 ReportType=0UL;
176 UserTabFile[0]='\0';
177 BlockIt[0]='\0';
178 ExternalCSSFile[0]='\0';
179 SquidGuardLogFormat[0]='\0';
180 SquidGuardLogAlternate[0]='\0';
181 for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
182
183 sprintf(ExcludeCodes,"%s/exclude_codes",SYSCONFDIR);
184 strcpy(GraphDaysBytesBarColor,"orange");
185 strcpy(BgColor,"#ffffff");
186 strcpy(TxColor,"#000000");
187 strcpy(TxBgColor,"lavender");
188 strcpy(TiColor,"darkblue");
189 strcpy(Width,"80");
190 strcpy(Height,"45");
191 strcpy(LogoTextColor,"#000000");
192 strcpy(HeaderColor,"darkblue");
193 strcpy(HeaderBgColor,"#dddddd");
194 strcpy(LogoTextColor,"#006699");
195 strcpy(FontSize,"9px");
196 strcpy(TempDir,"/tmp");
197 strcpy(OutputDir,"/var/www/html/squid-reports");
198 Ip2Name=0;
199 strcpy(DateFormat,"u");
200 OverwriteReport=0;
201 RemoveTempFiles=1;
202 strcpy(ReplaceIndex,"index.html");
203 Index=INDEX_YES;
204 RecordsWithoutUser=RECORDWITHOUTUSER_IP;
205 UseComma=0;
206 strcpy(MailUtility,"mailx");
207 TopSitesNum=100;
208 TopUsersNum=0;
209 UserIp=0;
210 strcpy(TopuserSortField,"BYTES");
211 strcpy(UserSortField,"BYTES");
212 strcpy(TopuserSortOrder,"reverse");
213 strcpy(UserSortOrder,"reverse");
214 strcpy(TopsitesSortField,"CONNECT");
215 strcpy(TopsitesSortType,"D");
216 LongUrl=0;
217 strcpy(FontFace,"Verdana,Tahoma,Arial");
218 strcpy(datetimeby,"elap");
219 strcpy(CharSet,"ISO-8859-1");
220 Privacy=0;
221 strcpy(PrivacyString,"***.***.***.***");
222 strcpy(PrivacyStringColor,"blue");
223 SuccessfulMsg=1;
224 TopUserFields=TOPUSERFIELDS_NUM | TOPUSERFIELDS_DATE_TIME | TOPUSERFIELDS_USERID | TOPUSERFIELDS_CONNECT |
225 TOPUSERFIELDS_BYTES | TOPUSERFIELDS_SETYB | TOPUSERFIELDS_IN_CACHE_OUT |
226 TOPUSERFIELDS_USED_TIME | TOPUSERFIELDS_MILISEC | TOPUSERFIELDS_PTIME |
227 TOPUSERFIELDS_TOTAL | TOPUSERFIELDS_AVERAGE;
228 UserReportFields=USERREPORTFIELDS_CONNECT | USERREPORTFIELDS_BYTES | USERREPORTFIELDS_SETYB |
229 USERREPORTFIELDS_IN_CACHE_OUT | USERREPORTFIELDS_USED_TIME | USERREPORTFIELDS_MILISEC |
230 USERREPORTFIELDS_PTIME | USERREPORTFIELDS_TOTAL | USERREPORTFIELDS_AVERAGE;
231 strcpy(DataFileDelimiter,";");
232 DataFileFields=DATA_FIELD_USER | DATA_FIELD_DATE | DATA_FIELD_TIME | DATA_FIELD_URL | DATA_FIELD_CONNECT |
233 DATA_FIELD_BYTES | DATA_FIELD_IN_CACHE | DATA_FIELD_OUT_CACHE | DATA_FIELD_ELAPSED;
234 ShowReadStatistics=1;
235 strcpy(IndexSortOrder,"D");
236 ShowSargInfo=1;
237 ShowSargLogo=1;
238 strcpy(ParsedOutputLog,"no");
239 strcpy(ParsedOutputLogCompress,"/bin/gzip -f");
240 strcpy(DisplayedValues,"abbreviation");
241 strcpy(HeaderFontSize,"9px");
242 strcpy(TitleFontSize,"11px");
243 strcpy(AuthUserFile,"/usr/local/sarg/passwd");
244 strcpy(AuthName,"SARG, Restricted Access");
245 strcpy(AuthType,"basic");
246 strcpy(Require,"require user admin %u");
247 set_download_suffix("7z,ace,arj,avi,bat,bin,bz2,bzip,cab,com,cpio,dll,doc,dot,exe,gz,iso,lha,lzh,mdb,mov,mp3,mpeg,mpg,mso,nrg,ogg,ppt,rar,rtf,shs,src,sys,tar,tgz,vcd,vob,wma,wmv,zip");
248 Graphs=1;
249 #if defined(FONTDIR)
250 strcpy(GraphFont,FONTDIR"/DejaVuSans.ttf");
251 #else
252 GraphFont[0]='\0';
253 #endif
254 strcpy(Ulimit,"20000");
255 NtlmUserFormat=NTLMUSERFORMAT_DOMAINUSER;
256 IndexTree=INDEX_TREE_FILE;
257 strcpy(RealtimeTypes,"GET,PUT,CONNECT");
258 strcpy(RealtimeUnauthRec,"show");
259 SquidguardIgnoreDate=0;
260 DansguardianIgnoreDate=0;
261 DataFileUrl=DATAFILEURL_IP;
262 strcpy(MaxElapsed,"28800000");
263 BytesInSitesUsersReport=0;
264 UserAuthentication=0;
265 strcpy(LDAPHost,"127.0.0.1");
266 LDAPPort=389;
267 LDAPProtocolVersion=3;
268 LDAPBindDN[0]='\0';
269 LDAPBindPW[0]='\0';
270 LDAPBaseSearch[0]='\0';
271 strcpy(LDAPFilterSearch, "uid=%s");
272 strcpy(LDAPTargetAttr, "cn");
273
274 dia[0]='\0';
275 mes[0]='\0';
276 ano[0]='\0';
277 hora[0]='\0';
278 tmp[0]='\0';
279 tmp2[0]='\0';
280 tmp3[0]='\0';
281 wtemp[0]='\0';
282 wtemp2[0]='\0';
283 us[0]='\0';
284 date[0]='\0';
285 df[0]='\0';
286 uagent[0]='\0';
287 hexclude[0]='\0';
288 addr[0]='\0';
289 hm[0]='\0';
290 hmf[0]='\0';
291 site[0]='\0';
292 outdir[0]='\0';
293 elap[0]='\0';
294 email[0]='\0';
295 zip[0]='\0';
296 UserInvalidChar[0]='\0';
297 DataFile[0]='\0';
298 SquidGuardConf[0]='\0';
299 DansGuardianConf[0]='\0';
300 start_hour[0]='\0';
301 end_hour[0]='\0';
302
303 denied_count=0;
304 download_count=0;
305 authfail_count=0;
306 dansguardian_count=0;
307 squidguard_count=0;
308 useragent_count=0;
309 DeniedReportLimit=10;
310 AuthfailReportLimit=10;
311 DansGuardianReportLimit=10;
312 SquidGuardReportLimit=10;
313 DownloadReportLimit=50;
314 UserReportLimit=0;
315 debug=0;
316 debugz=0;
317 debugm=0;
318 iprel=0;
319 userip=0;
320 color1=0;
321 color2=0;
322 color3=0;
323 realt=0;
324 realtime_refresh=3;
325 realtime_access_log_lines=1000;
326 cost=0.01;
327 nocost=50000000;
328 ndownload=0;
329 squid24=0;
330
331 bzero(IncludeUsers, sizeof(IncludeUsers));
332 bzero(ExcludeString, sizeof(ExcludeString));
333 first_user_file=NULL;
334
335 NAccessLog=0;
336 for(x=0; x<MAXLOGS; x++)
337 AccessLog[x][0]='\0';
338 AccessLogFromCmdLine=0;
339
340 strcpy(Title,_("Squid User Access Report"));
341
342 while((ch = getopt(argc, argv, "a:b:c:d:e:f:g:u:l:L:o:s:t:w:hijmnprvxyz")) != -1){
343 switch(ch)
344 {
345 case 'a':
346 strcpy(addr,optarg);
347 break;
348 case 'b':
349 strcpy(uagent,optarg);
350 break;
351 case 'c':
352 strcpy(hexclude,optarg);
353 break;
354 case 'd':
355 strncpy(date,optarg,sizeof(date)-1);
356 date[sizeof(date)-1]='\0';
357 getword_start(&gwarea,optarg);
358 if (getword(cdfrom,sizeof(cdfrom),&gwarea,'-')<0 || getword(cduntil,sizeof(cduntil),&gwarea,0)<0) {
359 printf(_("SARG: The date range requested on the command line by option -d is invalid.\n"));
360 exit(1);
361 }
362 date_from(date, cdfrom, cduntil);
363 dfrom=atoi(cdfrom);
364 duntil=atoi(cduntil);
365 break;
366 case 'e':
367 strcpy(email,optarg);
368 break;
369 case 'f':
370 strcpy(ConfigFile,optarg);
371 break;
372 case 'g':
373 strcpy(df,optarg);
374 break;
375 case 'h':
376 usage(argv[0]);
377 exit(0);
378 break;
379 case 'i':
380 iprel++;
381 break;
382 case 'l':
383 if (NAccessLog>=MAXLOGS) {
384 printf(_("SARG: Too many log files passed on command line with option -l.\n"));
385 exit(1);
386 }
387 strcpy(AccessLog[NAccessLog],optarg);
388 NAccessLog++;
389 AccessLogFromCmdLine++;
390 break;
391 case 'L':
392 strcpy(SquidGuardLogAlternate,optarg);
393 break;
394 case 'm':
395 debugm++;
396 break;
397 case 'n':
398 dns++;
399 break;
400 case 'o':
401 strcpy(outdir,optarg);
402 break;
403 case 'p':
404 userip++;
405 break;
406 case 'r':
407 realt++;
408 break;
409 case 's':
410 strcpy(site,optarg);
411 break;
412 case 't':
413 {
414 int h,m;
415
416 if(strstr(optarg,"-") == 0) {
417 strcpy(hm,optarg);
418 strcpy(hmf,optarg);
419 } else {
420 getword_start(&gwarea,optarg);
421 if (getword(hm,sizeof(hm),&gwarea,'-')<0 || getword(hmf,sizeof(hmf),&gwarea,0)<0) {
422 debuga(_("The time range passed on the command line with option -t is invalid"));
423 exit(1);
424 }
425 }
426 if(sscanf(hm,"%d:%d",&h,&m)!=2) {
427 debuga(_("Time period must be MM or MM:SS. Exit"));
428 exit(1);
429 }
430 sprintf(hm,"%02d%02d",h,m);
431 if(sscanf(hmf,"%d:%d",&h,&m)!=2) {
432 debuga(_("Time period must be MM or MM:SS. Exit"));
433 exit(1);
434 }
435 sprintf(hmf,"%02d%02d",h,m);
436 break;
437 }
438 case 'u':
439 strcpy(us,optarg);
440 break;
441 case 'v':
442 version();
443 break;
444 case 'w':
445 strcpy(tmp,optarg);
446 break;
447 case 'x':
448 debug++;
449 break;
450 case 'y':
451 langcode++;
452 break;
453 case 'z':
454 debugz++;
455 break;
456 case ':':
457 debuga(_("Option -%c require an argument"),optopt);
458 errflg++;
459 break;
460 case '?':
461 usage(argv[0]);
462 exit(1);
463 break;
464 }
465
466 }
467
468 if (errflg) {
469 usage(argv[0]);
470 exit(2);
471 }
472
473 if(debug) debuga(_("Init"));
474
475 if(ConfigFile[0] == '\0') sprintf(ConfigFile,"%s/sarg.conf",SYSCONFDIR);
476 if(access(ConfigFile, R_OK) != 0) {
477 debuga(_("Cannot open config file: %s - %s"),ConfigFile,strerror(errno));
478 exit(1);
479 }
480
481 if(access(ConfigFile, R_OK) == 0)
482 getconf();
483
484 if(UserIp) userip++;
485
486 if(dns) Ip2Name=1;
487
488 if(realt) {
489 realtime();
490 exit(0);
491 }
492
493 if(IndexTree == INDEX_TREE_FILE)
494 strcpy(ImageFile,"../images");
495 else
496 strcpy(ImageFile,"../../../images");
497
498 dataonly=0;
499 if(DataFile[0] != '\0')
500 dataonly++;
501
502 if(!NAccessLog) {
503 strcpy(AccessLog[0],"/var/log/squid/access.log");
504 NAccessLog++;
505 }
506
507 if(strcmp(hexclude,"onvert") == 0 && strcmp(site,"plit") != 0) {
508 convlog(AccessLog[0], df, dfrom, duntil);
509 exit(0);
510 }
511
512 if(strcmp(site,"plit") == 0) {
513 splitlog(AccessLog[0], df, dfrom, duntil, hexclude);
514 exit(0);
515 }
516
517 load_excludecodes(ExcludeCodes);
518
519 if(access(PasswdFile, R_OK) == 0) {
520 getusers(PasswdFile,debug);
521 puser++;
522 }
523
524 if(hexclude[0] == '\0')
525 strcpy(hexclude,ExcludeHosts);
526 if(hexclude[0] != '\0') {
527 gethexclude(hexclude,debug);
528 fhost++;
529 }
530
531 if(ReportType == 0) {
532 ReportType=REPORT_TYPE_TOPUSERS | REPORT_TYPE_TOPSITES | REPORT_TYPE_USERS_SITES |
533 REPORT_TYPE_SITES_USERS | REPORT_TYPE_DATE_TIME | REPORT_TYPE_DENIED |
534 REPORT_TYPE_AUTH_FAILURES | REPORT_TYPE_SITE_USER_TIME_DATE | REPORT_TYPE_DOWNLOADS;
535 }
536
537 if(access(ExcludeUsers, R_OK) == 0) {
538 getuexclude(ExcludeUsers,debug);
539 fuser++;
540 }
541
542 indexonly=0;
543 if(fuser) {
544 if(is_indexonly())
545 indexonly++;
546 }
547 if(strcmp(ExcludeUsers,"indexonly") == 0) indexonly++;
548 if(Index == INDEX_ONLY) indexonly++;
549
550 if(MaxElapsed[0] != '\0') max_elapsed=atol(MaxElapsed);
551
552 if(outdir[0] == '\0') strcpy(outdir,OutputDir);
553 strcat(outdir,"/");
554
555 if(uagent[0] == '\0') strcpy(uagent,UserAgentLog);
556
557 if(tmp[0] == '\0') strcpy(tmp,TempDir);
558 else strcpy(TempDir,tmp);
559
560 if(df[0] == '\0') strcpy(df,DateFormat);
561 else strcpy(DateFormat,df);
562
563 if(df[0] == '\0') {
564 strcpy(df,"u");
565 strcpy(DateFormat,"u");
566 }
567
568 if(email[0] == '\0' && OutputEmail[0] != '\0') strcpy(email,OutputEmail);
569
570 strcpy(tmp2,tmp);
571
572 if(email[0] != '\0') {
573 sprintf(wtemp2,"%s/sarg",tmp2);
574 my_mkdir(wtemp2);
575 strcat(tmp2,"/sarg");
576 strcpy(outdir,tmp2);
577 strcat(outdir,"/");
578 }
579
580 strcat(tmp2,"/sarg.log");
581
582 sprintf(tmp3,"%s/sarg",tmp);
583 if(access(tmp3, R_OK) == 0) {
584 unlinkdir(tmp3,1);
585 }
586 my_mkdir(tmp3);
587 strcpy(denied_unsort,tmp3);
588 strcpy(denied_sort,tmp3);
589 strcpy(authfail_unsort,tmp3);
590 strcat(denied_unsort,"/denied.log.unsort");
591 strcat(denied_sort,"/denied.log");
592 strcat(authfail_unsort,"/authfail.log.unsort");
593
594 if(debug) {
595 fprintf(stderr, _("SARG: Parameters:\nSARG:\n"));
596 fprintf(stderr, _("SARG: Hostname or IP address (-a) = %s\n"),addr);
597 fprintf(stderr, _("SARG: Useragent log (-b) = %s\n"),uagent);
598 fprintf(stderr, _("SARG: Exclude file (-c) = %s\n"),hexclude);
599 fprintf(stderr, _("SARG: Date from-until (-d) = %s\n"),date);
600 fprintf(stderr, _("SARG: Email address to send reports (-e) = %s\n"),email);
601 fprintf(stderr, _("SARG: Config file (-f) = %s\n"),ConfigFile);
602 if(strcmp(df,"e") == 0)
603 fprintf(stderr, _("SARG: Date format (-g) = Europe (dd/mm/yyyy)\n"));
604 if(strcmp(df,"u") == 0)
605 fprintf(stderr, _("SARG: Date format (-g) = USA (mm/dd/yyyy)\n"));
606 if(strcmp(df,"w") == 0)
607 fprintf(stderr, _("SARG: Date format (-g) = Sites & Users (yyyy/ww)\n"));
608 fprintf(stderr, _("SARG: IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
609 for (iarq=0 ; iarq<NAccessLog ; iarq++)
610 fprintf(stderr, _("SARG: Input log (-l) = %s\n"),AccessLog[iarq]);
611 fprintf(stderr, _("SARG: Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
612 fprintf(stderr, _("SARG: Output dir (-o) = %s\n"),outdir);
613 fprintf(stderr, _("SARG: Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
614 fprintf(stderr, _("SARG: Accessed site (-s) = %s\n"),site);
615 fprintf(stderr, _("SARG: Time (-t) = %s\n"),hm);
616 fprintf(stderr, _("SARG: User (-u) = %s\n"),us);
617 fprintf(stderr, _("SARG: Temporary dir (-w) = %s\n"),tmp);
618 fprintf(stderr, _("SARG: Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
619 fprintf(stderr, _("SARG: Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
620 fprintf(stderr, _("SARG:\n"));
621 }
622
623 if(debugm) {
624 printf(_("Parameters:\n"));
625 printf(_(" Hostname or IP address (-a) = %s\n"),addr);
626 printf(_(" Useragent log (-b) = %s\n"),uagent);
627 printf(_(" Exclude file (-c) = %s\n"),hexclude);
628 printf(_(" Date from-until (-d) = %s\n"),date);
629 printf(_(" Email address to send reports (-e) = %s\n"),email);
630 printf(_(" Config file (-f) = %s\n"),ConfigFile);
631 if(strcmp(df,"e") == 0)
632 printf(_(" Date format (-g) = Europe (dd/mm/yyyy)\n"));
633 if(strcmp(df,"u") == 0)
634 printf(_(" Date format (-g) = USA (mm/dd/yyyy)\n"));
635 if(strcmp(df,"w") == 0)
636 printf(_(" Date format (-g) = Sites & Users (yyyy/ww)\n"));
637 printf(_(" IP report (-i) = %s\n"),(iprel) ? _("Yes") : _("No"));
638 for (iarq=0 ; iarq<NAccessLog ; iarq++)
639 printf(_(" Input log (-l) = %s\n"),AccessLog[iarq]);
640 printf(_(" Resolve IP Address (-n) = %s\n"),(Ip2Name) ? _("Yes") : _("No"));
641 printf(_(" Output dir (-o) = %s\n"),outdir);
642 printf(_("Use Ip Address instead of userid (-p) = %s\n"),(UserIp) ? _("Yes") : _("No"));
643 printf(_(" Accessed site (-s) = %s\n"),site);
644 printf(_(" Time (-t) = %s\n"),hm);
645 printf(_(" User (-u) = %s\n"),us);
646 printf(_(" Temporary dir (-w) = %s\n"),tmp);
647 printf(_(" Debug messages (-x) = %s\n"),(debug) ? _("Yes") : _("No"));
648 printf(_(" Process messages (-z) = %s\n"),(debugz) ? _("Yes") : _("No"));
649 printf(_("sarg version: %s\n"),VERSION);
650 }
651
652 if(debug)
653 debuga(_("sarg version: %s"),VERSION);
654
655 maxopenfiles=MAX_OPEN_USER_FILES;
656 #ifdef HAVE_RLIM_T
657 if (Ulimit[0] != '\0') {
658 struct rlimit rl;
659 long l1, l2;
660 int rc=0;
661
662 #if defined(RLIMIT_NOFILE)
663 getrlimit (RLIMIT_NOFILE, &rl);
664 #elif defined(RLIMIT_OFILE)
665 getrlimit (RLIMIT_OFILE, &rl);
666 #else
667 #warning "No rlimit resource for the number of open files"
668 #endif
669 l1 = rl.rlim_cur;
670 l2 = rl.rlim_max;
671
672 rl.rlim_cur = atol(Ulimit);
673 rl.rlim_max = atol(Ulimit);
674 #if defined(RLIMIT_NOFILE)
675 rc=setrlimit (RLIMIT_NOFILE, &rl);
676 #elif defined(RLIMIT_OFILE)
677 rc=setrlimit (RLIMIT_OFILE, &rl);
678 #else
679 #warning "No rlimit resource for the number of open files"
680 #endif
681 if(rc == -1) {
682 debuga(_("setrlimit error - %s\n"),strerror(errno));
683 }
684
685 if(debug)
686 debuga("Maximum file descriptor: cur=%ld max=%ld, changed to cur="RLIM_STRING" max="RLIM_STRING,l1,l2,rl.rlim_cur,rl.rlim_max);
687 }
688 #endif
689
690 init_usertab(UserTabFile);
691
692 if (longline_prepare(&line)<0) {
693 debuga(_("Not enough memory to read a log file"));
694 exit(1);
695 }
696
697 sprintf ( sz_Download_Unsort , "%s/sarg/download.unsort", tmp);
698
699 if(DataFile[0]=='\0') {
700 if((ReportType & REPORT_TYPE_DENIED) != 0) {
701 if((fp_denied=MY_FOPEN(denied_unsort,"w"))==NULL) {
702 debuga(_("SARG: (log) Cannot open file: %s - %s"),denied_unsort,strerror(errno));
703 exit(1);
704 }
705 }
706
707 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
708 if((fp_authfail=MY_FOPEN(authfail_unsort,"w"))==NULL) {
709 debuga(_("SARG: (log) Cannot open file: %s - %s"),authfail_unsort,strerror(errno));
710 exit(1);
711 }
712 }
713 }
714
715 for (iarq=0 ; iarq<NAccessLog ; iarq++) {
716 strcpy(arq,AccessLog[iarq]);
717
718 strcpy(arqtt,arq);
719
720 if(strcmp(arq,"-")==0) {
721 if(debug)
722 debuga(_("Reading access log file: from stdin"));
723 fp_in=stdin;
724 from_stdin=1;
725 } else {
726 decomp(arq,zip,tmp);
727 if(debug)
728 debuga(_("Reading access log file: %s"),arq);
729 if((fp_in=MY_FOPEN(arq,"r"))==NULL) {
730 debuga(_("SARG: (log) Cannot open log file: %s - %s"),arq,strerror(errno));
731 exit(1);
732 }
733 from_stdin=0;
734 }
735 ilf=ILF_Unknown;
736 download_flag=0;
737 // pre-read the file only if we have to show stats
738 if(ShowReadStatistics && !from_stdin) {
739 size_t nread,i;
740 int skipcr=0;
741
742 recs1=0UL;
743 recs2=0UL;
744
745 while ((nread=fread(line.buffer,1,line.size,fp_in))>0) {
746 for (i=0 ; i<nread ; i++)
747 if (skipcr) {
748 if (line.buffer[i]!='\n' && line.buffer[i]!='\r') {
749 skipcr=0;
750 }
751 } else {
752 if (line.buffer[i]=='\n' || line.buffer[i]=='\r') {
753 skipcr=1;
754 recs1++;
755 }
756 }
757 }
758 rewind(fp_in);
759 printf(_("SARG: Records in file: %lu, reading: %3.2f%%\r"),recs1,(float) 0);
760 fflush( stdout ) ;
761 }
762
763 line.start=0;
764 line.end=0;
765 line.length=0;
766
767 while ((linebuf=longline_read(fp_in,&line))!=NULL) {
768 blen=strlen(linebuf);
769
770 if (ilf==ILF_Unknown) {
771 if(strncmp(linebuf,"#Software: Mic",14) == 0) {
772 fixendofline(linebuf);
773 if (debug)
774 debuga(_("Log is from Microsoft ISA: %s"),linebuf);
775 ilf=ILF_Isa;
776 ilf_count[ilf]++;
777 continue;
778 }
779
780 if(strncmp(linebuf,"*** SARG Log ***",16) == 0) {
781 getword_start(&gwarea,arqtt);
782 if (getword_skip(2000,&gwarea,'-')<0 || getword(val2,sizeof(val2),&gwarea,'_')<0 ||
783 getword_skip(10,&gwarea,'-')<0 || getword(val3,sizeof(val3),&gwarea,'_')<0) {
784 printf("SARG: The name of the file is invalid: %s\n",arq);
785 exit(1);
786 }
787 sprintf(period,"%s-%s",val2,val3);
788 ilf=ILF_Sarg;
789 ilf_count[ilf]++;
790 continue;
791 }
792 }
793
794 if(!fp_log && strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg) {
795 if(access(ParsedOutputLog,R_OK) != 0) {
796 my_mkdir(ParsedOutputLog);
797 }
798 sprintf(arq_log,"%s/sarg_temp.log",ParsedOutputLog);
799 if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
800 debuga(_("SARG: (log) Cannot open log file: %s - %s"),arq_log,strerror(errno));
801 exit(1);
802 }
803 fputs("*** SARG Log ***\n",fp_log);
804 }
805
806 recs2++;
807 if( ShowReadStatistics && !from_stdin && ! --OutputNonZero) {
808 perc = recs2 * 100 ;
809 perc = perc / recs1 ;
810 printf("SARG: Records in file: %lu, reading: %3.2f%%\r",recs1,perc);
811 fflush (stdout);
812 OutputNonZero = REPORT_EVERY_X_LINES ;
813 }
814 if(blen < 58) continue;
815 if(strstr(linebuf,"HTTP/0.0") != 0) continue;
816 if(strstr(linebuf,"logfile turned over") != 0) continue;
817 if(linebuf[0] == ' ') continue;
818
819 // Record only hours usage which is required
820 tt = (time_t) strtoul( linebuf, NULL, 10 );
821 t = localtime( &tt );
822
823 if( bsearch( &( t -> tm_wday ), weekdays.list, weekdays.len,
824 sizeof( int ), compar ) == NULL )
825 continue;
826
827 if( bsearch( &( t -> tm_hour ), hours.list, hours.len,
828 sizeof( int ), compar ) == NULL )
829 continue;
830
831 // exclude_string
832 if(ExcludeString[0] != '\0') {
833 exstring=0;
834 getword_start(&gwarea,ExcludeString);
835 while(strchr(gwarea.current,':') != 0) {
836 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
837 debuga(_("Maybe you have a broken record or garbage in your exclusion string"));
838 exit(1);
839 }
840 if((str=(char *) strstr(linebuf,val1)) != (char *) NULL )
841 exstring++;
842 }
843 if((str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
844 exstring++;
845 if(exstring) continue;
846 }
847
848 totregsl++;
849 if(debugm)
850 printf("BUF=%s\n",linebuf);
851
852 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
853 getword_start(&gwarea,linebuf);
854 if (getword(data,sizeof(data),&gwarea,' ')<0) {
855 printf("SARG: Maybe you have a broken time in your access.log file.\n");
856 exit(1);
857 }
858 if((str=(char *) strchr(data, '.')) != (char *) NULL ) {
859 if((str=(char *) strchr(str+1, '.')) != (char *) NULL ) {
860 strcpy(ip,data);
861 strcpy(elap,"0");
862 if(squid24) {
863 if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) {
864 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
865 exit(1);
866 }
867 } else {
868 if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
869 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
870 exit(1);
871 }
872 }
873 if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 ||
874 getword(fun,sizeof(fun),&gwarea,' ')<0) {
875 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
876 exit(1);
877 }
878 if (getword_ptr(linebuf,&url,&gwarea,' ')<0) {
879 debuga(_("Maybe you have a broken url in your %s file"),arq);
880 exit(1);
881 }
882 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword(code2,sizeof(code2),&gwarea,' ')<0 ||
883 getword(tam,sizeof(tam),&gwarea,' ')<0) {
884 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
885 exit(1);
886 }
887 if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) {
888 if (getword(code,sizeof(code),&gwarea,' ')<0) {
889 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
890 exit(1);
891 }
892 } else {
893 if (getword(code,sizeof(code),&gwarea,'\0')<0) {
894 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
895 exit(1);
896 }
897 }
898
899 if ((str = strchr(code, ':')) != NULL)
900 *str = '/';
901
902 if(strcmp(tam,"\0") == 0)
903 strcpy(tam,"0");
904
905 ilf=ILF_Common;
906 ilf_count[ilf]++;
907 }
908 }
909
910 if(ilf==ILF_Unknown || ilf==ILF_Squid) {
911 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
912 printf("SARG: Maybe you have a broken elapsed time in your %s file.\n",arq);
913 exit(1);
914 }
915 while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0')
916 if (getword(elap,sizeof(elap),&gwarea,' ')<0) {
917 printf("SARG: Maybe you have a broken elapsed time in your %s file.\n",arq);
918 exit(1);
919 }
920 if(strlen(elap) < 1) continue;
921 if (getword(ip,sizeof(ip),&gwarea,' ')<0){
922 printf("SARG: Maybe you have a broken client IP address in your %s file.\n",arq);
923 exit(1);
924 }
925 if (getword(code,sizeof(code),&gwarea,' ')<0){
926 printf("SARG: Maybe you have a broken result code in your %s file.\n",arq);
927 exit(1);
928 }
929 if (getword(tam,sizeof(tam),&gwarea,' ')<0){
930 printf("SARG: Maybe you have a broken amount of data in your %s file.\n",arq);
931 exit(1);
932 }
933 if (getword(fun,sizeof(fun),&gwarea,' ')<0){
934 printf("SARG: Maybe you have a broken request method in your %s file.\n",arq);
935 exit(1);
936 }
937 if (getword_ptr(linebuf,&url,&gwarea,' ')<0){
938 debuga(_("Maybe you have a broken url in your %s file"),arq);
939 exit(1);
940 }
941 if (getword(user,sizeof(user),&gwarea,' ')<0){
942 printf("SARG: Maybe you have a broken user ID in your %s file.\n",arq);
943 exit(1);
944 }
945 ilf=ILF_Squid;
946 ilf_count[ilf]++;
947 }
948 }
949 if (ilf==ILF_Sarg) {
950 getword_start(&gwarea,linebuf);
951 if (getword(data,sizeof(data),&gwarea,'\t')<0){
952 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
953 exit(1);
954 }
955 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
956 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
957 exit(1);
958 }
959 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
960 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
961 exit(1);
962 }
963 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
964 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
965 exit(1);
966 }
967 if (getword_ptr(linebuf,&url,&gwarea,'\t')<0){
968 debuga(_("Maybe you have a broken record or garbage in your %s file"),arq);
969 exit(1);
970 }
971 if (getword(tam,sizeof(tam),&gwarea,'\t')<0){
972 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
973 exit(1);
974 }
975 if (getword(code,sizeof(code),&gwarea,'\t')<0){
976 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
977 exit(1);
978 }
979 if (getword(elap,sizeof(elap),&gwarea,'\t')<0){
980 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
981 exit(1);
982 }
983 if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){
984 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
985 exit(1);
986 }
987 }
988 if (ilf==ILF_Isa) {
989 if (linebuf[0] == '#') {
990 int ncols,cols[ISACOL_Last];
991
992 fixendofline(linebuf);
993 getword_start(&gwarea,linebuf);
994 // remove the #Fields: column at the beginning of the line
995 if (getword_skip(1000,&gwarea,' ')<0){
996 debuga(_("Maybe you have a broken record or garbage in your %s file"),arq);
997 exit(1);
998 }
999 for (ncols=0 ; ncols<ISACOL_Last ; ncols++) cols[ncols]=-1;
1000 ncols=0;
1001 while(gwarea.current[0] != '\0') {
1002 if (getword(val1,sizeof(val1),&gwarea,'\t')<0){
1003 printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
1004 exit(1);
1005 }
1006 if(strcmp(val1,"c-ip") == 0) cols[ISACOL_Ip]=ncols;
1007 if(strcmp(val1,"cs-username") == 0) cols[ISACOL_UserName]=ncols;
1008 if(strcmp(val1,"date") == 0) cols[ISACOL_Date]=ncols;
1009 if(strcmp(val1,"time") == 0) cols[ISACOL_Time]=ncols;
1010 if(strcmp(val1,"time-taken") == 0) cols[ISACOL_TimeTaken]=ncols;
1011 if(strcmp(val1,"sc-bytes") == 0) cols[ISACOL_Bytes]=ncols;
1012 if(strcmp(val1,"cs-uri") == 0) cols[ISACOL_Uri]=ncols;
1013 if(strcmp(val1,"sc-status") == 0) cols[ISACOL_Status]=ncols;
1014 ncols++;
1015 }
1016 if (cols[ISACOL_Ip]>=0) {
1017 isa_ncols=ncols;
1018 for (ncols=0 ; ncols<ISACOL_Last ; ncols++)
1019 isa_cols[ncols]=cols[ncols];
1020 }
1021 continue;
1022 }
1023 if (!isa_ncols) continue;
1024 getword_start(&gwarea,linebuf);
1025 for (x=0 ; x<isa_ncols ; x++) {
1026 if (getword_ptr(linebuf,&str,&gwarea,'\t')<0) {
1027 debuga(_("Maybe you have a broken record or garbage in your %s file"),arq);
1028 exit(1);
1029 }
1030 if (x==isa_cols[ISACOL_Ip]) {
1031 if (strlen(str)>=sizeof(ip)) {
1032 debuga(_("Maybe you have a broken IP in your %s file"),arq);
1033 exit(1);
1034 }
1035 strcpy(ip,str);
1036 } else if (x==isa_cols[ISACOL_UserName]) {
1037 if (strlen(str)>=sizeof(user)) {
1038 debuga(_("Maybe you have a broken user in your %s file"),arq);
1039 exit(1);
1040 }
1041 strcpy(user,str);
1042 } else if (x==isa_cols[ISACOL_Date]) {
1043 if (strlen(str)>=sizeof(data)) {
1044 debuga(_("Maybe you have a broken date in your %s file"),arq);
1045 exit(1);
1046 }
1047 strcpy(data,str);
1048 } else if (x==isa_cols[ISACOL_Time]) {
1049 if (strlen(str)>=sizeof(hora)) {
1050 debuga(_("Maybe you have a broken time in your %s file"),arq);
1051 exit(1);
1052 }
1053 strcpy(hora,str);
1054 } else if (x==isa_cols[ISACOL_TimeTaken]) {
1055 if (strlen(str)>=sizeof(elap)) {
1056 debuga(_("Maybe you have a broken download duration in your %s file"),arq);
1057 exit(1);
1058 }
1059 strcpy(elap,str);
1060 } else if (x==isa_cols[ISACOL_Bytes]) {
1061 if (strlen(str)>=sizeof(tam)) {
1062 debuga(_("Maybe you have a broken download size in your %s file"),arq);
1063 exit(1);
1064 }
1065 strcpy(tam,str);
1066 } else if (x==isa_cols[ISACOL_Uri]) {
1067 url=str;
1068 } else if (x==isa_cols[ISACOL_Status]) {
1069 if (strlen(str)>=sizeof(code)) {
1070 debuga(_("Maybe you have a broken access code in your %s file"),arq);
1071 exit(1);
1072 }
1073 strcpy(code,str);
1074 }
1075 }
1076
1077 if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) {
1078 sprintf(val1,"DENIED/%s",code);
1079 strcpy(code,val1);
1080 }
1081 getword_start(&gwarea,data);
1082 if (getword(ano,sizeof(ano),&gwarea,'-')<0){
1083 debuga(_("Maybe you have a broken record or garbage in your %s file"),arq);
1084 exit(1);
1085 }
1086 if (getword(mes,sizeof(mes),&gwarea,'-')<0){
1087 debuga(_("Maybe you have a broken record or garbage in your %s file"),arq);
1088 exit(1);
1089 }
1090 if (getword(dia,sizeof(dia),&gwarea,'\0')<0){
1091 debuga(_("Maybe you have a broken record or garbage in your %s file"),arq);
1092 exit(1);
1093 }
1094 conv_month_name(mes);
1095 sprintf(data," %s/%s/%s:%s",dia,mes,ano,hora);
1096 }
1097
1098 if(ilf==ILF_Squid) {
1099 tt=atoi(data);
1100 t=localtime(&tt);
1101
1102 strftime(tbuf2, sizeof(tbuf2), "%H%M", t);
1103 sprintf(mes,"%d",t->tm_mon+1);
1104 conv_month_name(mes);
1105 if(strncmp(df,"u",1) == 0)
1106 sprintf(tbuf, "%04d%s%02d", t->tm_year+1900, mes, t->tm_mday);
1107 if(strncmp(df,"e",1) == 0)
1108 sprintf(tbuf, "%02d%s%04d", t->tm_mday, mes, t->tm_year+1900);
1109 if(strncmp(df,"w",1) == 0) {
1110 IndexTree=INDEX_TREE_FILE;
1111 strftime(tbuf, sizeof(tbuf), "%Y.%U", t);
1112 }
1113
1114 strftime(wdata, sizeof(wdata), "%Y%m%d", t);
1115 idata=atoi(wdata);
1116
1117 if(strncmp(df,"u",1)==0)
1118 strftime(dia, sizeof(dia), "%m/%d/%Y", t);
1119 else
1120 strftime(dia, sizeof(dia), "%d/%m/%Y", t);
1121 sprintf(hora,"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec);
1122 } else if(ilf==ILF_Common || ilf==ILF_Isa) {
1123 getword_start(&gwarea,data+1);
1124 if (getword_multisep(data,sizeof(data),&gwarea,':')<0){
1125 debuga(_("Maybe you have a broken date in your %s file"),arq);
1126 exit(1);
1127 }
1128 if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){
1129 debuga(_("Maybe you have a broken date in your %s file"),arq);
1130 exit(1);
1131 }
1132 getword_start(&gwarea,data);
1133 if (getword(dia,sizeof(dia),&gwarea,'/')<0){
1134 debuga(_("Maybe you have a broken date in your %s file"),arq);
1135 exit(1);
1136 }
1137 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
1138 debuga(_("Maybe you have a broken date in your %s file"),arq);
1139 exit(1);
1140 }
1141 if (getword(ano,sizeof(ano),&gwarea,'/')<0){
1142 debuga(_("Maybe you have a broken date in your %s file"),arq);
1143 exit(1);
1144 }
1145
1146 if(strcmp(df,"u") == 0)
1147 snprintf(tbuf,sizeof(tbuf),"%s%s%s",ano,mes,dia);
1148 if(strcmp(df,"e") == 0)
1149 snprintf(tbuf,sizeof(tbuf),"%s%s%s",dia,mes,ano);
1150 builddia(dia,mes,ano,df,wdata);
1151 idata=atoi(wdata);
1152 } else if (ilf==ILF_Sarg) {
1153 getword_start(&gwarea,data);
1154 if(strcmp(df,"u") == 0) {
1155 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
1156 debuga(_("Maybe you have a broken date in your %s file"),arq);
1157 exit(1);
1158 }
1159 if (getword(dia,sizeof(dia),&gwarea,'/')<0){
1160 debuga(_("Maybe you have a broken date in your %s file"),arq);
1161 exit(1);
1162 }
1163 } else {
1164 if (getword(dia,sizeof(dia),&gwarea,'/')<0){
1165 debuga(_("Maybe you have a broken date in your %s file"),arq);
1166 exit(1);
1167 }
1168 if (getword(mes,sizeof(mes),&gwarea,'/')<0){
1169 debuga(_("Maybe you have a broken date in your %s file"),arq);
1170 exit(1);
1171 }
1172 }
1173 if (getword(ano,sizeof(ano),&gwarea,0)<0){
1174 debuga(_("Maybe you have a broken date in your %s file"),arq);
1175 exit(1);
1176 }
1177 snprintf(wdata,9,"%s%s%s",ano,mes,dia);
1178 idata=atoi(wdata);
1179 }
1180
1181 if(debugm)
1182 printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",date,idata,dfrom,duntil);
1183
1184 if(date[0] != '\0'){
1185 if(idata < dfrom || idata > duntil) continue;
1186 }
1187
1188 if(strlen(user) > MAX_USER_LEN) {
1189 if (debugm) printf(_("User ID too long: %s\n"),user);
1190 totregsx++;
1191 continue;
1192 }
1193
1194 // include_users
1195 if(IncludeUsers[0] != '\0') {
1196 sprintf(val1,":%s:",user);
1197 if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL )
1198 continue;
1199 }
1200
1201 if(vercode(code)) {
1202 if (debugm) printf(_("Excluded code: %s\n"),code);
1203 totregsx++;
1204 continue;
1205 }
1206
1207 if(testvaliduserchar(user))
1208 continue;
1209
1210 #if 0
1211 if((str = strstr(user,"%20")) != NULL) {
1212 /*
1213 This is a patch introduced to solve bug #1624251 reported at sourceforge but
1214 the side effect is to truncate the name at the first space and merge the reports
1215 of people whose name is identical up to the first space.
1216
1217 The old code used to truncate the user name at the first % if a %20 was
1218 found anywhere in the string. That means the string could be truncated
1219 at the wrong place if another % occured before the %20. This new code should
1220 avoid that problem and only truncate at the space. There is no bug
1221 report indicating that anybody noticed this.
1222 */
1223 *str='\0';
1224 }
1225
1226 /*
1227 Code prior to 2.2.7 used to replace any %xx by a dot as long as a %5c was
1228 found in the user name.
1229 */
1230 while((str = strstr(user,"%5c")) != NULL) {
1231 *str='.';
1232 for (x=3 ; str[x] ; x++) str[x-2]=str[x];
1233 }
1234 #endif
1235
1236 urly=url;
1237
1238 if(ilf!=ILF_Sarg) {
1239 /*
1240 The full URL is not saved in sarg log. There is no point in testing the URL to detect
1241 a downloaded file.
1242 */
1243 download_flag=is_download_suffix(url);
1244 if (download_flag) {
1245 download_url=url;
1246 download_count++;
1247 }
1248 } else
1249 download_flag=0;
1250
1251 // remove any protocol:// at the beginning of the URL
1252 if ((str = strchr(url,'/')) != NULL && str[1] == '/') {
1253 int i;
1254
1255 str+=2;
1256 for (i=0 ; str[i] ; i++)
1257 url[i]=str[i];
1258 url[i]='\0';
1259 }
1260
1261 if(!LongUrl) {
1262 url_hostname(url,hostname,sizeof(hostname));
1263 url=hostname;
1264 }
1265
1266 if(url[0] == '\0') continue;
1267
1268 if(addr[0] != '\0'){
1269 if(strcmp(addr,ip)!=0) continue;
1270 }
1271 if(fhost) {
1272 if(!vhexclude(url)) {
1273 if (debugm) printf(_("Excluded site: %s\n"),url);
1274 totregsx++;
1275 continue;
1276 }
1277 }
1278
1279 if(hm[0] != '\0') {
1280 hmr[0]='\0';
1281 chm++;
1282 getword_start(&gwarea,hora);
1283 while(chm) {
1284 if (getword_multisep(warea,sizeof(warea),&gwarea,':')<0){
1285 debuga(_("Maybe you have a broken time in your %s file"),arq);
1286 exit(1);
1287 }
1288 strncat(hmr,warea,2);
1289 chm--;
1290 }
1291 strncat(hmr,gwarea.current,2);
1292
1293 if(atoi(hmr) < atoi(hm) || atoi(hmr) > atoi(hmf)) continue;
1294 }
1295
1296 if(site[0] != '\0'){
1297 if(strstr(url,site)==0) continue;
1298 }
1299
1300 if(userip) {
1301 strcpy(user,ip);
1302 id_is_ip=1;
1303 } else {
1304 id_is_ip=0;
1305 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
1306 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
1307 strcpy(user,ip);
1308 id_is_ip=1;
1309 }
1310 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
1311 continue;
1312 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
1313 strcpy(user,"everybody");
1314 } else {
1315 strlow(user);
1316 if(NtlmUserFormat == NTLMUSERFORMAT_USER) {
1317 if((str = strchr(user,'_')) != 0) {
1318 strcpy(warea,str+1);
1319 strcpy(user,warea);
1320 }
1321 if((str = strchr(user,'+')) != 0) {
1322 strcpy(warea,str+1);
1323 strcpy(user,warea);
1324 }
1325 }
1326 }
1327 }
1328
1329 if(us[0] != '\0'){
1330 if(strcmp(user,us)!=0) continue;
1331 }
1332
1333 if(puser) {
1334 sprintf(wuser,":%s:",user);
1335 if(strstr(userfile, wuser) == 0)
1336 continue;
1337 }
1338
1339 if(fuser) {
1340 if(!vuexclude(user)) {
1341 if (debugm) printf(_("Excluded user: %s\n"),user);
1342 totregsx++;
1343 continue;
1344 }
1345 }
1346
1347 if(strcmp(user,"-") ==0 || strcmp(user," ") ==0 || strcmp(user,"") ==0 || strcmp(user,":") ==0)
1348 continue;
1349
1350 if(max_elapsed) {
1351 if(atol(elap)>max_elapsed) {
1352 elap[0]='0';
1353 elap[1]='\0';
1354 }
1355 }
1356
1357 if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
1358 fixendofline(str);
1359 sprintf(smartfilter,"\"%s\"",str+1);
1360 } else sprintf(smartfilter,"\"\"");
1361
1362 nopen=0;
1363 prev_ufile=NULL;
1364 for (ufile=first_user_file ; ufile && strcmp(user,ufile->user->id)!=0 ; ufile=ufile->next) {
1365 prev_ufile=ufile;
1366 if (ufile->file) nopen++;
1367 }
1368 if (!ufile) {
1369 ufile=malloc(sizeof(*ufile));
1370 if (!ufile) {
1371 debuga(_("Not enough memory to store the user %s"),user);
1372 exit(1);
1373 }
1374 memset(ufile,0,sizeof(*ufile));
1375 ufile->next=first_user_file;
1376 first_user_file=ufile;
1377 uinfo=userinfo_create(user);
1378 ufile->user=uinfo;
1379 uinfo->id_is_ip=id_is_ip;
1380 } else {
1381 if (prev_ufile) {
1382 prev_ufile->next=ufile->next;
1383 ufile->next=first_user_file;
1384 first_user_file=ufile;
1385 }
1386 }
1387
1388 if (ufile->file==NULL) {
1389 if (nopen>=maxopenfiles) {
1390 x=0;
1391 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
1392 if (ufile1->file!=NULL) {
1393 if (x>=maxopenfiles) {
1394 fclose(ufile1->file);
1395 ufile1->file=NULL;
1396 }
1397 x++;
1398 }
1399 }
1400 }
1401 if (snprintf (tmp3, sizeof(tmp3), "%s/sarg/%s.unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
1402 debuga(_("Temporary user file name too long: %s/sarg/%s.unsort"), tmp, ufile->user->filename);
1403 exit(1);
1404 }
1405 if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
1406 fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], _("Cannot open temporary file"), tmp3, strerror(errno));
1407 exit (1);
1408 }
1409 }
1410
1411 /*if ( strcmp ( user , sz_Last_User ) != 0 ) {
1412 if ( fp_Write_User )
1413 fclose( fp_Write_User ) ;
1414 sprintf (tmp3, "%s/sarg/%s.unsort", tmp, user);
1415
1416 if ((fp_Write_User = MY_FOPEN (tmp3, "a")) == NULL) {
1417 fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], _("Cannot open temporary file"), tmp3, strerror(errno));
1418 exit (1);
1419 }
1420 strcpy( sz_Last_User , user ) ;
1421 }*/
1422 fprintf(ufile->file, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,ip,url,tam,code,elap,smartfilter);
1423
1424 if(fp_log && ilf!=ILF_Sarg)
1425 fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,url,tam,code,elap,smartfilter);
1426
1427 totregsg++;
1428
1429 if(!dataonly && download_flag && download_url && strstr(code,"DENIED") == 0) {
1430 ndownload = 1;
1431
1432 if ( ! fp_Download_Unsort ) {
1433 if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) {
1434 fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], _("Cannot open temporary file"), tmp3, strerror(errno));
1435 exit (1);
1436 }
1437 }
1438 fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,download_url);
1439 }
1440
1441 if((ReportType & REPORT_TYPE_DENIED) != 0 || (ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) {
1442 if(fp_denied && strstr(code,"DENIED/403") != 0) {
1443 fprintf(fp_denied, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1444 denied_count++;
1445 }
1446 if(fp_authfail && (strstr(code,"DENIED/401") != 0 || strstr(code,"DENIED/407") != 0)) {
1447 fprintf(fp_authfail, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,urly);
1448 authfail_count++;
1449 }
1450 }
1451
1452 if((!totper || idata<mindate) && ilf!=ILF_Sarg){
1453 totper++;
1454 mindate=idata;
1455 sprintf(period,"%s-",tbuf);
1456 strcpy(start_hour,tbuf2);
1457 if(date[0] != '\0')
1458 fixper(tbuf, period, cduntil);
1459 if(debugz){
1460 debugaz("tbuf",tbuf);
1461 debugaz("period",period);
1462 }
1463 }
1464
1465 if(debugm){
1466 printf("IP=\t%s\n",ip);
1467 printf("USER=\t%s\n",user);
1468 printf("ELAP=\t%s\n",elap);
1469 printf("DATE=\t%s\n",dia);
1470 printf("TIME=\t%s\n",hora);
1471 printf("FUNC=\t%s\n",fun);
1472 printf("URL=\t%s\n",url);
1473 printf("CODE=\t%s\n",code);
1474 printf("LEN=\t%s\n",tam);
1475 }
1476 }
1477 if (!from_stdin) {
1478 fclose(fp_in);
1479 if( ShowReadStatistics )
1480 printf("SARG: Records in file: %lu, reading: %3.2f%%\n",recs1, (float) 100 );
1481 }
1482 }
1483
1484 longline_free(&line);
1485 if ( fp_Download_Unsort )
1486 fclose (fp_Download_Unsort);
1487
1488 for (ufile=first_user_file ; ufile ; ufile=ufile1) {
1489 ufile1=ufile->next;
1490 if (ufile->file!=NULL) fclose(ufile->file);
1491 free(ufile);
1492 }
1493
1494 free_download();
1495 free_excludecodes();
1496 free_exclude();
1497
1498 if(debug) {
1499 int totalcount=0;
1500
1501 for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
1502
1503 debuga(" %s: %ld, %s: %ld, %s: %ld",_("Records read"),totregsl,_("written"),totregsg,_("excluded"),totregsx);
1504
1505 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
1506 debuga("%s",_("Log with mixed records format (squid and common log)"));
1507
1508 if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
1509 debuga("%s",_("Common log format"));
1510
1511 if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
1512 debuga("%s",_("Squid log format"));
1513
1514 if(ilf_count[ILF_Sarg]>0)
1515 debuga("%s",_("Sarg log format"));
1516
1517 if(totalcount==0) {
1518 if(!totregsg) {
1519 fprintf(stderr, "SARG: %s\n",_("No records found"));
1520 fprintf(stderr, "SARG: %s\n",_("End"));
1521 } else fprintf(stderr, "SARG: %s\n",_("Log with invalid format"));
1522 if(fp_denied)
1523 fclose(fp_denied);
1524 if(fp_authfail)
1525 fclose(fp_authfail);
1526 userinfo_free();
1527 if(userfile)
1528 free(userfile);
1529 close_usertab();
1530 unlink(denied_unsort);
1531 unlink(authfail_unsort);
1532 unlink(tmp3);
1533 exit(0);
1534 }
1535 }
1536
1537 if(!totregsg){
1538 fprintf(stderr, "SARG: %s\n",_("No records found"));
1539 fprintf(stderr, "SARG: %s\n",_("End"));
1540 // fclose(fp_ou);
1541 if(fp_denied)
1542 fclose(fp_denied);
1543 if(fp_authfail)
1544 fclose(fp_authfail);
1545 userinfo_free();
1546 if(userfile)
1547 free(userfile);
1548 close_usertab();
1549 exit(0);
1550 }
1551
1552 if(date[0] == '\0' && ilf_count[ILF_Sarg]==0) {
1553 strcat(period,tbuf);
1554 }
1555
1556 if(debugz){
1557 debugaz("data",dia);
1558 debugaz("tbuf",tbuf);
1559 debugaz("period",period);
1560 }
1561
1562 if(debug)
1563 debuga("%s: %s",_("Period"),period);
1564
1565 // fclose(fp_ou);
1566 if(fp_denied)
1567 fclose(fp_denied);
1568 if(fp_authfail)
1569 fclose(fp_authfail);
1570
1571 if(fp_log != NULL) {
1572 fclose(fp_log);
1573 strcpy(end_hour,tbuf2);
1574 getword_start(&gwarea,period);
1575 if (getword(val2,sizeof(val2),&gwarea,'-')<0){
1576 printf("SARG: Maybe you have a broken date range definition.\n");
1577 exit(1);
1578 }
1579 if (getword(val1,sizeof(val1),&gwarea,'\0')<0){
1580 printf("SARG: Maybe you have a broken date range definition.\n");
1581 exit(1);
1582 }
1583 sprintf(val4,"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour);
1584 if (rename(arq_log,val4)) {
1585 fprintf(stderr,"SARG: failed to rename %s to %s - %s\n",arq_log,val4,strerror(errno));
1586 } else {
1587 strcpy(arq_log,val4);
1588
1589 if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
1590 /*
1591 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
1592 necessary around the command name, put them in the configuration file.
1593 */
1594 sprintf(val1,"%s \"%s\"",ParsedOutputLogCompress,arq_log);
1595 cstatus=system(val1);
1596 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1597 fprintf(stderr, "SARG: command return status %d\n",WEXITSTATUS(cstatus));
1598 fprintf(stderr, "SARG: command: %s\n",val1);
1599 exit(1);
1600 }
1601 }
1602 }
1603 if(debug)
1604 debuga("%s %s",_("Sarg parsed log saved as"),arq_log);
1605 }
1606
1607 if(DataFile[0] == '\0' && (ReportType & REPORT_TYPE_DENIED) != 0) {
1608 sprintf(csort,"sort -T \"%s\" -k 3,3 -k 5,5 -o \"%s\" \"%s\"",tmp,denied_sort,denied_unsort);
1609 cstatus=system(csort);
1610 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
1611 fprintf(stderr, "SARG: sort command return status %d\n",WEXITSTATUS(cstatus));
1612 fprintf(stderr, "SARG: sort command: %s\n",csort);
1613 exit(1);
1614 }
1615 unlink(denied_unsort);
1616 }
1617
1618 sort_users_log(tmp, debug);
1619
1620 if(DataFile[0] != '\0')
1621 data_file(tmp);
1622 else
1623 gerarel();
1624
1625 unlink(tmp2);
1626 if((ReportType & REPORT_TYPE_DENIED) != 0)
1627 unlink(denied_sort);
1628
1629 if(zip[0] != '\0' && strcmp(zip,"zcat") !=0) {
1630 recomp(arq, zip);
1631 }
1632 // else unlink(arq);
1633
1634 if(strcmp(tmp,"/tmp") != 0) {
1635 unlinkdir(tmp,0);
1636 }
1637
1638 userinfo_free();
1639 if(userfile)
1640 free(userfile);
1641 close_usertab();
1642
1643 if(debug)
1644 debuga("%s",_("End"));
1645
1646 exit(0);
1647
1648 }
1649
1650
1651 static void getusers(const char *pwdfile, int debug)
1652 {
1653
1654 FILE *fp_usr;
1655 char buf[255];
1656 char *str;
1657 long int nreg=0;
1658
1659 if(debug)
1660 debuga("%s: %s",_("Loading password file from"),pwdfile);
1661
1662 if ((fp_usr = fopen(pwdfile, "r")) == NULL) {
1663 fprintf(stderr, "SARG: (getusers) %s: %s - %s\n",_("Cannot open file"),pwdfile,strerror(errno));
1664 exit(1);
1665 }
1666
1667 fseek(fp_usr, 0, SEEK_END);
1668 nreg = ftell(fp_usr);
1669 if (nreg<0) {
1670 printf("SARG: Cannot get the size of file %s",pwdfile);
1671 exit(1);
1672 }
1673 nreg = nreg+5000;
1674 fseek(fp_usr, 0, SEEK_SET);
1675
1676 if((userfile=(char *) malloc(nreg))==NULL){
1677 fprintf(stderr, "SARG: %s (%ld):\n",_("malloc error"),nreg);
1678 exit(1);
1679 }
1680
1681 bzero(userfile,nreg);
1682 strcpy(userfile,":");
1683
1684 while(fgets(buf,sizeof(buf),fp_usr)!=NULL) {
1685 str=strchr(buf,':');
1686 if (!str) {
1687 printf("SARG: You have an invalid user in your %s file.\n",pwdfile);
1688 exit(1);
1689 }
1690 str[1]=0;
1691 strcat(userfile,buf);
1692 }
1693
1694 fclose(fp_usr);
1695
1696 return;
1697 }