2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
30 static char **files_done
= NULL
;
31 static int nfiles_done
= 0;
33 //! The number of invalid lines found in the redirector report.
34 static int RedirectorErrors
=0;
35 //! The file containing the sorted entries.
36 static char redirector_sorted
[MAXLEN
]="";
38 static void parse_log(FILE *fp_ou
,char *buf
)
40 char leks
[5], sep
[2], res
[MAXLEN
];
42 char source
[128], list
[128];
43 char full_url
[MAX_URL_LEN
];
45 char user
[MAX_USER_LEN
];
47 char userlabel
[MAX_USER_LEN
];
48 long long int lmon
, lday
, lyear
;
52 struct getwordstruct gwarea
;
53 struct getwordstruct gwarea1
;
54 struct userinfostruct
*uinfo
;
56 getword_start(&gwarea
,buf
);
57 if(RedirectorLogFormat
[0] != '\0') {
58 getword_start(&gwarea1
,RedirectorLogFormat
);
60 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0) {
61 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters before first tag)\n"));
73 while(strcmp(leks
,"end") != 0) {
74 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0) {
75 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (missing # at end of tag)\n"));
78 if (getword(sep
,sizeof(sep
),&gwarea1
,'#')<0) {
79 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters in column separator)\n"));
82 if(strcmp(leks
,"end") != 0) {
83 if (getword_limit(res
,sizeof(res
),&gwarea
,sep
[0])<0) {
84 debuga(_("Parsing of tag \"%s\" in redirector log %s returned no result\n"),leks
,wentp
);
88 if(strcmp(leks
,"year") == 0) {
90 } else if(strcmp(leks
,"mon") == 0) {
92 } else if(strcmp(leks
,"day") == 0) {
94 } else if(strcmp(leks
,"hour") == 0) {
95 if (strlen(res
)>=sizeof(hour
)) {
96 debuga(_("Hour string too long in redirector log file %s\n"),wentp
);
101 } else if(strcmp(leks
,"source") == 0) {
102 if (strlen(res
)>=sizeof(source
)) {
103 debuga(_("Banning source name too long in redirector log file %s\n"),wentp
);
108 } else if(strcmp(leks
,"list") == 0) {
109 if (strlen(res
)>=sizeof(list
)) {
110 debuga(_("Banning list name too long in redirector log file %s\n"),wentp
);
115 } else if(strcmp(leks
,"ip") == 0) {
116 if (strlen(res
)>=sizeof(ip
)) {
117 debuga(_("IP address too long in redirector log file %s\n"),wentp
);
122 } else if(strcmp(leks
,"user") == 0) {
123 if (strlen(res
)>=sizeof(user
)) {
124 debuga(_("User ID too long in redirector log file %s\n"),wentp
);
129 } else if(strcmp(leks
,"url") == 0) {
131 * Don't worry about the url being truncated as we only keep the host name
134 safe_strcpy(full_url
,res
,sizeof(full_url
));
139 if (getword_atoll(&lyear
,&gwarea
,'-')<0 || getword_atoll(&lmon
,&gwarea
,'-')<0 ||
140 getword_atoll(&lday
,&gwarea
,' ')<0) {
141 debuga(_("Invalid date in file \"%s\"\n"),wentp
);
148 if (getword(hour
,sizeof(hour
),&gwarea
,' ')<0) {
149 debuga(_("Invalid time in file \"%s\"\n"),wentp
);
153 if (getword_skip(MAXLEN
,&gwarea
,'(')<0 || getword(source
,sizeof(source
),&gwarea
,'/')<0) {
154 debuga(_("Invalid redirected source in file \"%s\"\n"),wentp
);
158 if (getword(list
,sizeof(list
),&gwarea
,'/')<0) {
159 debuga(_("Invalid redirected list in file \"%s\"\n"),wentp
);
163 if (getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword_limit(full_url
,sizeof(full_url
),&gwarea
,' ')<0) {
164 debuga(_("Invalid url in file \"%s\"\n"),wentp
);
168 if (getword(ip
,sizeof(ip
),&gwarea
,'/')<0) {
169 debuga(_("Invalid source IP in file \"%s\"\n"),wentp
);
173 if (getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword(user
,sizeof(user
),&gwarea
,' ')<0) {
174 debuga(_("Invalid user in file \"%s\"\n"),wentp
);
179 url
=process_url(full_url
,false);
181 //sprintf(warea,"%04d%02d%02d",year,mon,day);
183 if(RedirectorFilterOutDate
) {
184 idata
= year
*10000+mon
*100+day
;
185 if(idata
< dfrom
|| idata
> duntil
)
194 if (user
[0]=='\0' || (user
[1]=='\0' && (user
[0]=='-' || user
[0]==' '))) {
195 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IP
) {
199 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IGNORE
)
201 if(RecordsWithoutUser
== RECORDWITHOUTUSER_EVERYBODY
)
202 strcpy(user
,"everybody");
205 uinfo
=userinfo_find_from_id(user
);
207 uinfo
=userinfo_create(user
,(id_is_ip
) ? NULL
: ip
);
208 uinfo
->no_report
=true;
209 if(Ip2Name
&& id_is_ip
) ip2name(user
,sizeof(user
));
210 user_find(userlabel
,MAX_USER_LEN
, user
);
211 userinfo_label(uinfo
,userlabel
);
213 fprintf(fp_ou
,"%s\t%04d%02d%02d\t%s\t%s\t%s\t",uinfo
->id
,year
,mon
,day
,hour
,ip
,url
);
214 if (source
[0] && list
[0])
215 fprintf(fp_ou
,"%s/%s\n",source
,list
);
217 fprintf(fp_ou
,"%s\n",source
);
219 fprintf(fp_ou
,"%s\n",list
);
223 static void read_log(const char *wentp
, FILE *fp_ou
,int dfrom
,int duntil
)
231 debuga(_("Reading redirector log file %s\n"),wentp
);
234 /* With squidGuard, you can log groups in only one log file.
235 We must parse each log files only one time. Example :
237 domainlist porn/domains
242 domainlist aggressive/domains
243 urllist aggressive/urls
247 domainlist audio-video/domains
248 urllist audio-video/urls
252 for (i
=0; i
<nfiles_done
; i
++)
253 if (!strcmp(wentp
, files_done
[i
])) return;
256 files_done
= realloc(files_done
, nfiles_done
*sizeof(char *));
258 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
261 files_done
[nfiles_done
-1] = strdup(wentp
);
262 if (!files_done
[nfiles_done
-1]) {
263 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
267 if ((fp_in
=fopen(wentp
,"r"))==NULL
) {
268 debuga(_("Cannot open file \"%s\": %s\n"),wentp
,strerror(errno
));
272 if ((line
=longline_create())==NULL
) {
273 debuga(_("Not enough memory to read the redirector log\n"));
277 while ((buf
=longline_read(fp_in
,line
)) != NULL
) {
278 parse_log(fp_ou
,buf
);
280 if (fclose(fp_in
)==EOF
) {
281 debuga(_("Read error in \"%s\": %s\n"),wentp
,strerror(errno
));
284 longline_destroy(&line
);
289 void redirector_log(void)
291 FILE *fp_ou
= NULL
, *fp_guard
= NULL
;
293 char guard_in
[MAXLEN
];
306 if(SquidGuardConf
[0] == '\0' && NRedirectorLogs
== 0) {
307 if (debugz
>=LogLevel_Process
) debugaz(_("No redirector logs provided to produce that kind of report\n"));
311 snprintf(guard_in
,sizeof(guard_in
),"%s/redirector.int_unsort",tmp
);
312 if((fp_ou
=fopen(guard_in
,"w"))==NULL
) {
313 debuga(_("Cannot open file \"%s\": %s\n"),guard_in
,strerror(errno
));
317 dfrom
=(period
.start
.tm_year
+1900)*10000+(period
.start
.tm_mon
+1)*100+period
.start
.tm_mday
;
318 duntil
=(period
.end
.tm_year
+1900)*10000+(period
.end
.tm_mon
+1)*100+period
.end
.tm_mday
;
320 if (NRedirectorLogs
>0) {
321 for (i
=0 ; i
<NRedirectorLogs
; i
++)
322 read_log(RedirectorLogs
[i
],fp_ou
,dfrom
,duntil
);
324 if(access(SquidGuardConf
, R_OK
) != 0) {
325 debuga(_("Cannot open file \"%s\": %s\n"),SquidGuardConf
,strerror(errno
));
329 if((fp_guard
=fopen(SquidGuardConf
,"r"))==NULL
) {
330 debuga(_("Cannot open file \"%s\": %s\n"),SquidGuardConf
,strerror(errno
));
335 while(fgets(buf
,sizeof(buf
),fp_guard
)!=NULL
) {
337 if((str
=get_param_value("logdir",buf
))!=NULL
) {
339 We want to tolerate spaces inside the directory name but we must also
340 remove the trailing spaces left by the editor after the directory name.
341 This should not be a problem as nobody use a file name with trailing spaces.
343 for (y
=strlen(str
)-1 ; y
>=0 && (unsigned char)str
[y
]<=' ' ; y
--);
344 if (y
>=sizeof(logdir
)-1) y
=sizeof(logdir
)-2;
350 } else if((str
=get_param_value("log",buf
))!=NULL
) {
351 if((str2
=get_param_value("anonymous",str
))!=NULL
)
355 If logdir is defined, we prepend it to the log file name, otherwise, we assume
356 the log directive provides an absolute file name to the log file. Therefore,
357 we don't need to add an additionnal / at the beginning of the log file name.
359 y
=(logdir
[0]) ? sprintf(wentp
,"%s/",logdir
) : 0;
361 Spaces are allowed in the name of the log file. The file name ends at the first #
362 because it is assumed it is an end of line comment. Any space before the # is then
363 removed. Any control character (i.e. a character with a code lower than 32) ends
364 the file name. That includes the terminating zero.
366 while((unsigned char)*str
>=' ' && *str
!='#' && y
<sizeof(wentp
)-1)
370 while(*str
==' ' && y
>0) {
376 read_log(wentp
,fp_ou
,dfrom
,duntil
);
379 if (fclose(fp_guard
)==EOF
) {
380 debuga(_("Read error in \"%s\": %s\n"),SquidGuardConf
,strerror(errno
));
385 if (fp_ou
&& fclose(fp_ou
)==EOF
) {
386 debuga(_("Write error in \"%s\": %s\n"),guard_in
,strerror(errno
));
391 for (y
=0; y
<nfiles_done
; y
++)
392 if (files_done
[y
]) free(files_done
[y
]);
396 if (redirector_count
) {
397 snprintf(redirector_sorted
,sizeof(redirector_sorted
),"%s/redirector.int_log",tmp
);
399 debuga(_("Sorting file: %s\n"),redirector_sorted
);
402 if (snprintf(tmp6
,sizeof(tmp6
),"sort -t \"\t\" -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in
, redirector_sorted
)>=sizeof(tmp6
)) {
403 debuga(_("Sort command too long when sorting file \"%s\" to \"%s\"\n"),guard_in
,redirector_sorted
);
406 cstatus
=system(tmp6
);
407 if (!WIFEXITED(cstatus
) || WEXITSTATUS(cstatus
)) {
408 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus
));
409 debuga(_("sort command: %s\n"),tmp6
);
414 if (!KeepTempLog
&& unlink(guard_in
)) {
415 debuga(_("Cannot delete \"%s\": %s\n"),guard_in
,strerror(errno
));
421 static void show_ignored_redirector(FILE *fp_ou
,int count
)
425 snprintf(ignored
,sizeof(ignored
),ngettext("%d more redirector entry not shown here…","%d more redirector entries not shown here…",count
),count
);
426 fprintf(fp_ou
,"<tr><td class=\"data\"></td><td class=\"data\"></td><td class=\"data\"></td><td class=\"data2 more\">%s</td><td class=\"data\"></td></tr>\n",ignored
);
429 void redirector_report(void)
431 FILE *fp_in
= NULL
, *fp_ou
= NULL
;
449 struct getwordstruct gwarea
;
450 const struct userinfostruct
*uinfo
;
457 if(!redirector_count
) {
458 if (debugz
>=LogLevel_Process
) {
459 if (redirector_sorted
[0])
460 debugaz(_("Redirector report not generated because it is empty\n"));
465 snprintf(report
,sizeof(report
),"%s/redirector.html",outdirname
);
467 if((fp_in
=fopen(redirector_sorted
,"r"))==NULL
) {
468 debuga(_("Cannot open file \"%s\": %s\n"),redirector_sorted
,strerror(errno
));
472 if((fp_ou
=fopen(report
,"w"))==NULL
) {
473 debuga(_("Cannot open file \"%s\": %s\n"),report
,strerror(errno
));
477 if ((line
=longline_create())==NULL
) {
478 debuga(_("Not enough memory to read the processed redirector log\n"));
482 write_html_header(fp_ou
,(IndexTree
== INDEX_TREE_DATE
) ? 3 : 1,_("Redirector report"),HTML_JS_NONE
);
483 fputs("<tr><td class=\"header_c\">",fp_ou
);
484 fprintf(fp_ou
,_("Period: %s"),period
.html
);
485 fputs("</td></tr>\n",fp_ou
);
486 fprintf(fp_ou
,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Redirector report"));
487 close_html_header(fp_ou
);
489 fputs("<div class=\"report\"><table cellpadding=1 cellspacing=2>\n",fp_ou
);
490 fprintf(fp_ou
,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"),_("RULE"));
492 while((buf
=longline_read(fp_in
,line
))!=NULL
) {
493 getword_start(&gwarea
,buf
);
494 if (getword(user
,sizeof(user
),&gwarea
,'\t')<0) {
495 debuga(_("Invalid user in file \"%s\"\n"),redirector_sorted
);
498 if (getword_atoll(&data2
,&gwarea
,'\t')<0) {
499 debuga(_("Invalid date in file \"%s\"\n"),redirector_sorted
);
502 if (getword(hora
,sizeof(hora
),&gwarea
,'\t')<0) {
503 debuga(_("Invalid time in file \"%s\"\n"),redirector_sorted
);
506 if (getword(ip
,sizeof(ip
),&gwarea
,'\t')<0) {
507 debuga(_("Invalid IP address in file \"%s\"\n"),redirector_sorted
);
510 if (getword_ptr(buf
,&url
,&gwarea
,'\t')<0) {
511 debuga(_("Invalid url in file \"%s\"\n"),redirector_sorted
);
514 if (getword(rule
,sizeof(rule
),&gwarea
,'\n')<0) {
515 debuga(_("Invalid rule in file \"%s\"\n"),redirector_sorted
);
519 uinfo
=userinfo_find_from_id(user
);
521 debuga(_("Unknown user ID %s in file \"%s\"\n"),user
,redirector_sorted
);
525 computedate(data2
/10000,(data2
/100)%10,data2
%100,&t
);
526 strftime(data
,sizeof(data
),"%x",&t
);
533 if (Ip2Name
&& !uinfo
->id_is_ip
) ip2name(oname
,sizeof(oname
));
537 if(strcmp(ouser
,user
) != 0) {
541 if(strcmp(oip
,ip
) != 0) {
544 if (Ip2Name
&& !uinfo
->id_is_ip
) ip2name(oname
,sizeof(oname
));
549 if(SquidGuardReportLimit
) {
550 if(strcmp(ouser2
,uinfo
->label
) == 0) {
553 if(count
>SquidGuardReportLimit
&& SquidGuardReportLimit
>0)
554 show_ignored_redirector(fp_ou
,count
-SquidGuardReportLimit
);
556 strcpy(ouser2
,uinfo
->label
);
558 if(count
> SquidGuardReportLimit
)
563 fprintf(fp_ou
,"<tr><td class=\"data2\">%s</td><td class=\"data2\">%s</td>",uinfo
->label
,ip
);
565 fputs("<tr><td class=\"data2\"></td><td class=\"data2\"></td>",fp_ou
);
566 fprintf(fp_ou
,"<td class=\"data2\">%s-%s</td><td class=\"data2\">",data
,hora
);
567 output_html_link(fp_ou
,url
,100);
568 fprintf(fp_ou
,"</td><td class=\"data2\">%s</td></tr>\n",rule
);
570 if (fclose(fp_in
)==EOF
) {
571 debuga(_("Read error in \"%s\": %s\n"),redirector_sorted
,strerror(errno
));
574 longline_destroy(&line
);
576 if(count
>SquidGuardReportLimit
&& SquidGuardReportLimit
>0)
577 show_ignored_redirector(fp_ou
,count
-SquidGuardReportLimit
);
579 fputs("</table>\n",fp_ou
);
581 if (RedirectorErrors
>0)
583 fputs("<div class=\"warn\"><span>",fp_ou
);
584 fprintf(fp_ou
,ngettext("%d error found in the log file. Some entries may be missing.","%d errors found in the log file. Some entries may be missing.",RedirectorErrors
),RedirectorErrors
);
585 fputs("</span></div>\n",fp_ou
);
588 fputs("</div>\n",fp_ou
);
589 write_html_trailer(fp_ou
);
590 if (fclose(fp_ou
)==EOF
) {
591 debuga(_("Write error in \"%s\": %s\n"),report
,strerror(errno
));
595 if (!KeepTempLog
&& unlink(redirector_sorted
)) {
596 debuga(_("Cannot delete \"%s\": %s\n"),redirector_sorted
,strerror(errno
));