2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
30 static char **files_done
= NULL
;
31 static int nfiles_done
= 0;
33 //! The number of invalid lines found in the redirector report.
34 static int RedirectorErrors
=0;
36 static void parse_log(FILE *fp_ou
,char *buf
)
38 char leks
[5], sep
[2], res
[MAXLEN
];
40 char source
[128], list
[128];
41 char full_url
[MAX_URL_LEN
];
43 char user
[MAX_USER_LEN
];
45 long long int lmon
, lday
, lyear
;
49 struct getwordstruct gwarea
;
50 struct getwordstruct gwarea1
;
51 struct userinfostruct
*uinfo
;
53 getword_start(&gwarea
,buf
);
54 if(RedirectorLogFormat
[0] != '\0') {
55 getword_start(&gwarea1
,RedirectorLogFormat
);
57 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0) {
58 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters before first tag)\n"));
70 while(strcmp(leks
,"end") != 0) {
71 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0) {
72 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (missing # at end of tag)\n"));
75 if (getword(sep
,sizeof(sep
),&gwarea1
,'#')<0) {
76 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters in column separator)\n"));
79 if(strcmp(leks
,"end") != 0) {
80 if (getword_limit(res
,sizeof(res
),&gwarea
,sep
[0])<0) {
81 debuga(_("Parsing of tag \"%s\" in redirector log %s returned no result\n"),leks
,wentp
);
85 if(strcmp(leks
,"year") == 0) {
87 } else if(strcmp(leks
,"mon") == 0) {
89 } else if(strcmp(leks
,"day") == 0) {
91 } else if(strcmp(leks
,"hour") == 0) {
92 if (strlen(res
)>=sizeof(hour
)) {
93 debuga(_("Hour string too long in redirector log file %s\n"),wentp
);
98 } else if(strcmp(leks
,"source") == 0) {
99 if (strlen(res
)>=sizeof(source
)) {
100 debuga(_("Banning source name too long in redirector log file %s\n"),wentp
);
105 } else if(strcmp(leks
,"list") == 0) {
106 if (strlen(res
)>=sizeof(list
)) {
107 debuga(_("Banning list name too long in redirector log file %s\n"),wentp
);
112 } else if(strcmp(leks
,"ip") == 0) {
113 if (strlen(res
)>=sizeof(ip
)) {
114 debuga(_("IP address too long in redirector log file %s\n"),wentp
);
119 } else if(strcmp(leks
,"user") == 0) {
120 if (strlen(res
)>=sizeof(user
)) {
121 debuga(_("User ID too long in redirector log file %s\n"),wentp
);
126 } else if(strcmp(leks
,"url") == 0) {
128 * Don't worry about the url being truncated as we only keep the host name
131 safe_strcpy(full_url
,res
,sizeof(full_url
));
136 if (getword_atoll(&lyear
,&gwarea
,'-')<0 || getword_atoll(&lmon
,&gwarea
,'-')<0 ||
137 getword_atoll(&lday
,&gwarea
,' ')<0) {
138 debuga(_("Invalid date found in file %s\n"),wentp
);
145 if (getword(hour
,sizeof(hour
),&gwarea
,' ')<0) {
146 debuga(_("Invalid time found in file %s\n"),wentp
);
150 if (getword_skip(MAXLEN
,&gwarea
,'(')<0 || getword(source
,sizeof(source
),&gwarea
,'/')<0) {
151 debuga(_("Invalid redirected source in file %s\n"),wentp
);
155 if (getword(list
,sizeof(list
),&gwarea
,'/')<0) {
156 debuga(_("Invalid redirected list in file %s\n"),wentp
);
160 if (getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword_limit(full_url
,sizeof(full_url
),&gwarea
,' ')<0) {
161 debuga(_("Invalid URL in file %s\n"),wentp
);
165 if (getword(ip
,sizeof(ip
),&gwarea
,'/')<0) {
166 debuga(_("Invalid source IP in file %s\n"),wentp
);
170 if (getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword(user
,sizeof(user
),&gwarea
,' ')<0) {
171 debuga(_("Invalid user in file %s\n"),wentp
);
176 url
=process_url(full_url
,false);
178 //sprintf(warea,"%04d%02d%02d",year,mon,day);
180 if(RedirectorFilterOutDate
) {
181 idata
= year
*10000+mon
*100+day
;
182 if(idata
< dfrom
|| idata
> duntil
)
191 if(strcmp(user
,"-") == 0 || strcmp(user
," ") == 0 || strcmp(user
,"") == 0) {
192 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IP
) {
196 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IGNORE
)
198 if(RecordsWithoutUser
== RECORDWITHOUTUSER_EVERYBODY
)
199 strcpy(user
,"everybody");
202 uinfo
=userinfo_find_from_id(user
);
204 uinfo
=userinfo_create(user
);
205 uinfo
->id_is_ip
=id_is_ip
;
206 uinfo
->no_report
=true;
207 if(Ip2Name
&& id_is_ip
) ip2name(user
,sizeof(user
));
208 user_find(uinfo
->label
,MAX_USER_LEN
, user
);
210 fprintf(fp_ou
,"%s\t%04d%02d%02d\t%s\t%s\t%s\t",uinfo
->id
,year
,mon
,day
,hour
,ip
,url
);
211 if (source
[0] && list
[0])
212 fprintf(fp_ou
,"%s/%s\n",source
,list
);
214 fprintf(fp_ou
,"%s\n",source
);
216 fprintf(fp_ou
,"%s\n",list
);
220 static void read_log(const char *wentp
, FILE *fp_ou
,int dfrom
,int duntil
)
228 debuga(_("Reading redirector log file %s\n"),wentp
);
231 /* With squidGuard, you can log groups in only one log file.
232 We must parse each log files only one time. Example :
234 domainlist porn/domains
239 domainlist aggressive/domains
240 urllist aggressive/urls
244 domainlist audio-video/domains
245 urllist audio-video/urls
249 for (i
=0; i
<nfiles_done
; i
++)
250 if (!strcmp(wentp
, files_done
[i
])) return;
253 files_done
= realloc(files_done
, nfiles_done
*sizeof(char *));
255 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
258 files_done
[nfiles_done
-1] = strdup(wentp
);
259 if (!files_done
[nfiles_done
-1]) {
260 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
264 if ((fp_in
=fopen(wentp
,"r"))==NULL
) {
265 debuga(_("(squidguard) Cannot open log file %s\n"),wentp
);
269 if ((line
=longline_create())==NULL
) {
270 debuga(_("Not enough memory to read the redirector log\n"));
274 while ((buf
=longline_read(fp_in
,line
)) != NULL
) {
275 parse_log(fp_ou
,buf
);
278 longline_destroy(&line
);
283 void redirector_log(void)
285 FILE *fp_ou
= NULL
, *fp_guard
= NULL
;
287 char guard_in
[MAXLEN
];
288 char guard_ou
[MAXLEN
];
301 if(SquidGuardConf
[0] == '\0' && NRedirectorLogs
== 0) {
302 if (debugz
) debugaz(_("No redirector logs provided to produce that kind of report\n"));
306 snprintf(guard_in
,sizeof(guard_in
),"%s/redirector.int_unsort",tmp
);
307 snprintf(guard_ou
,sizeof(guard_ou
),"%s/redirector.int_log",tmp
);
308 if((fp_ou
=fopen(guard_in
,"a"))==NULL
) {
309 debuga(_("(squidguard) Cannot open log file %s\n"),guard_in
);
313 dfrom
=(period
.start
.tm_year
+1900)*10000+(period
.start
.tm_mon
+1)*100+period
.start
.tm_mday
;
314 duntil
=(period
.end
.tm_year
+1900)*10000+(period
.end
.tm_mon
+1)*100+period
.end
.tm_mday
;
316 if (NRedirectorLogs
>0) {
317 for (i
=0 ; i
<NRedirectorLogs
; i
++)
318 read_log(RedirectorLogs
[i
],fp_ou
,dfrom
,duntil
);
320 if(access(SquidGuardConf
, R_OK
) != 0) {
321 debuga(_("Cannot open squidGuard config file: %s\n"),SquidGuardConf
);
325 if((fp_guard
=fopen(SquidGuardConf
,"r"))==NULL
) {
326 debuga(_("(squidguard) Cannot open log file %s\n"),SquidGuardConf
);
331 while(fgets(buf
,sizeof(buf
),fp_guard
)!=NULL
) {
333 if((str
=get_param_value("logdir",buf
))!=NULL
) {
335 We want to tolerate spaces inside the directory name but we must also
336 remove the trailing spaces left by the editor after the directory name.
337 This should not be a problem as nobody use a file name with trailing spaces.
339 for (y
=strlen(str
)-1 ; y
>=0 && (unsigned char)str
[y
]<=' ' ; y
--);
340 if (y
>=sizeof(logdir
)-1) y
=sizeof(logdir
)-2;
346 } else if((str
=get_param_value("log",buf
))!=NULL
) {
347 if((str2
=get_param_value("anonymous",str
))!=NULL
)
351 If logdir is defined, we prepend it to the log file name, otherwise, we assume
352 the log directive provides an absolute file name to the log file. Therefore,
353 we don't need to add an additionnal / at the beginning of the log file name.
355 y
=(logdir
[0]) ? sprintf(wentp
,"%s/",logdir
) : 0;
357 Spaces are allowed in the name of the log file. The file name ends at the first #
358 because it is assumed it is an end of line comment. Any space before the # is then
359 removed. Any control character (i.e. a character with a code lower than 32) ends
360 the file name. That includes the terminating zero.
362 while((unsigned char)*str
>=' ' && *str
!='#' && y
<sizeof(wentp
)-1)
366 while(*str
==' ' && y
>0) {
372 read_log(wentp
,fp_ou
,dfrom
,duntil
);
377 if (fp_guard
) fclose(fp_guard
);
378 if (fp_ou
) fclose(fp_ou
);
381 for (y
=0; y
<nfiles_done
; y
++)
382 if (files_done
[y
]) free(files_done
[y
]);
387 debuga(_("Sorting file: %s\n"),guard_ou
);
390 if (snprintf(tmp6
,sizeof(tmp6
),"sort -t \"\t\" -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in
, guard_ou
)>=sizeof(tmp6
)) {
391 debuga(_("Sort command too long when sorting file \"%s\" to \"%s\"\n"),guard_in
,guard_ou
);
394 cstatus
=system(tmp6
);
395 if (!WIFEXITED(cstatus
) || WEXITSTATUS(cstatus
)) {
396 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus
));
397 debuga(_("sort command: %s\n"),tmp6
);
401 if (unlink(guard_in
)) {
402 debuga(_("Cannot delete %s - %s\n"),guard_in
,strerror(errno
));
408 static void show_ignored_redirector(FILE *fp_ou
,int count
)
412 snprintf(ignored
,sizeof(ignored
),ngettext("%d more redirector entry not shown here…","%d more redirector entries not shown here…",count
),count
);
413 fprintf(fp_ou
,"<tr><td class=\"data\"></td><td class=\"data\"></td><td class=\"data\"></td><td class=\"data2 more\">%s</td><td class=\"data\"></td></tr>\n",ignored
);
416 void redirector_report(void)
418 FILE *fp_in
= NULL
, *fp_ou
= NULL
;
422 char squidguard_in
[MAXLEN
];
437 struct getwordstruct gwarea
;
438 const struct userinfostruct
*uinfo
;
445 snprintf(squidguard_in
,sizeof(squidguard_in
),"%s/redirector.int_log",tmp
);
446 if(!redirector_count
) {
447 unlink(squidguard_in
);
448 if (debugz
) debugaz(_("Redirector report not generated because it is empty\n"));
452 snprintf(report
,sizeof(report
),"%s/redirector.html",outdirname
);
454 if((fp_in
=fopen(squidguard_in
,"r"))==NULL
) {
455 debuga(_("(squidguard) Cannot open log file %s\n"),squidguard_in
);
459 if((fp_ou
=fopen(report
,"w"))==NULL
) {
460 debuga(_("(squidguard) Cannot open log file %s\n"),report
);
464 if ((line
=longline_create())==NULL
) {
465 debuga(_("Not enough memory to read the processed redirector log\n"));
469 write_html_header(fp_ou
,(IndexTree
== INDEX_TREE_DATE
) ? 3 : 1,_("Redirector report"),HTML_JS_NONE
);
470 fputs("<tr><td class=\"header_c\">",fp_ou
);
471 fprintf(fp_ou
,_("Period: %s"),period
.html
);
472 fputs("</td></tr>\n",fp_ou
);
473 fprintf(fp_ou
,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Redirector report"));
474 close_html_header(fp_ou
);
476 fputs("<div class=\"report\"><table cellpadding=1 cellspacing=2>\n",fp_ou
);
477 fprintf(fp_ou
,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"),_("RULE"));
479 while((buf
=longline_read(fp_in
,line
))!=NULL
) {
480 getword_start(&gwarea
,buf
);
481 if (getword(user
,sizeof(user
),&gwarea
,'\t')<0) {
482 debuga(_("Invalid user in file %s\n"),squidguard_in
);
485 if (getword_atoll(&data2
,&gwarea
,'\t')<0) {
486 debuga(_("Invalid date in file %s\n"),squidguard_in
);
489 if (getword(hora
,sizeof(hora
),&gwarea
,'\t')<0) {
490 debuga(_("Invalid time in file %s\n"),squidguard_in
);
493 if (getword(ip
,sizeof(ip
),&gwarea
,'\t')<0) {
494 debuga(_("Invalid IP address in file %s\n"),squidguard_in
);
497 if (getword_ptr(buf
,&url
,&gwarea
,'\t')<0) {
498 debuga(_("Invalid URL in file %s\n"),squidguard_in
);
501 if (getword(rule
,sizeof(rule
),&gwarea
,'\n')<0) {
502 debuga(_("Invalid rule in file %s\n"),squidguard_in
);
506 uinfo
=userinfo_find_from_id(user
);
508 debuga(_("Unknown user ID %s in file %s\n"),user
,squidguard_in
);
512 computedate(data2
/10000,(data2
/100)%10,data2
%100,&t
);
513 strftime(data
,sizeof(data
),"%x",&t
);
520 if (Ip2Name
&& !uinfo
->id_is_ip
) ip2name(oname
,sizeof(oname
));
524 if(strcmp(ouser
,user
) != 0) {
528 if(strcmp(oip
,ip
) != 0) {
531 if (Ip2Name
&& !uinfo
->id_is_ip
) ip2name(oname
,sizeof(oname
));
536 if(SquidGuardReportLimit
) {
537 if(strcmp(ouser2
,uinfo
->label
) == 0) {
540 if(count
>SquidGuardReportLimit
&& SquidGuardReportLimit
>0)
541 show_ignored_redirector(fp_ou
,count
-SquidGuardReportLimit
);
543 strcpy(ouser2
,uinfo
->label
);
545 if(count
> SquidGuardReportLimit
)
550 fprintf(fp_ou
,"<tr><td class=\"data2\">%s</td><td class=\"data2\">%s</td>",uinfo
->label
,ip
);
552 fputs("<tr><td class=\"data2\"></td><td class=\"data2\"></td>",fp_ou
);
553 fprintf(fp_ou
,"<td class=\"data2\">%s-%s</td><td class=\"data2\">",data
,hora
);
554 output_html_link(fp_ou
,url
,100);
555 fprintf(fp_ou
,"</td><td class=\"data2\">%s</td></tr>\n",rule
);
558 longline_destroy(&line
);
560 if(count
>SquidGuardReportLimit
&& SquidGuardReportLimit
>0)
561 show_ignored_redirector(fp_ou
,count
-SquidGuardReportLimit
);
563 fputs("</table>\n",fp_ou
);
565 if (RedirectorErrors
>0)
567 fputs("<div class=\"warn\"><span>",fp_ou
);
568 fprintf(fp_ou
,ngettext("%d error found in the log file. Some entries may be missing.","%d errors found in the log file. Some entries may be missing.",RedirectorErrors
),RedirectorErrors
);
569 fputs("</span></div>\n",fp_ou
);
572 fputs("</div>\n",fp_ou
);
573 if (write_html_trailer(fp_ou
)<0)
574 debuga(_("Write error in file %s\n"),report
);
575 if (fclose(fp_ou
)==EOF
)
576 debuga(_("Failed to close file %s - %s\n"),report
,strerror(errno
));
578 if (unlink(squidguard_in
)) {
579 debuga(_("Cannot delete %s - %s\n"),squidguard_in
,strerror(errno
));