2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
30 static char **files_done
= NULL
;
31 static int nfiles_done
= 0;
33 //! The number of invalid lines found in the redirector report.
34 static int RedirectorErrors
=0;
35 //! Name of the sorted report.
36 static char redirector_sorted
[MAXLEN
]="";
38 static void parse_log(FILE *fp_ou
,char *buf
)
40 char leks
[5], sep
[2], res
[MAXLEN
];
42 char source
[128], list
[128];
43 char full_url
[MAX_URL_LEN
];
45 char user
[MAX_USER_LEN
];
47 long long int lmon
, lday
, lyear
;
51 struct getwordstruct gwarea
;
52 struct getwordstruct gwarea1
;
53 struct userinfostruct
*uinfo
;
55 getword_start(&gwarea
,buf
);
56 if(RedirectorLogFormat
[0] != '\0') {
57 getword_start(&gwarea1
,RedirectorLogFormat
);
59 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0) {
60 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters before first tag)\n"));
72 while(strcmp(leks
,"end") != 0) {
73 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0) {
74 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (missing # at end of tag)\n"));
77 if (getword(sep
,sizeof(sep
),&gwarea1
,'#')<0) {
78 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters in column separator)\n"));
81 if(strcmp(leks
,"end") != 0) {
82 if (getword_limit(res
,sizeof(res
),&gwarea
,sep
[0])<0) {
83 debuga(_("Parsing of tag \"%s\" in redirector log \"%s\" returned no result\n"),leks
,wentp
);
87 if(strcmp(leks
,"year") == 0) {
89 } else if(strcmp(leks
,"mon") == 0) {
91 } else if(strcmp(leks
,"day") == 0) {
93 } else if(strcmp(leks
,"hour") == 0) {
94 if (strlen(res
)>=sizeof(hour
)) {
95 debuga(_("Hour string too long in redirector log file %s\n"),wentp
);
100 } else if(strcmp(leks
,"source") == 0) {
101 if (strlen(res
)>=sizeof(source
)) {
102 debuga(_("Banning source name too long in redirector log file %s\n"),wentp
);
107 } else if(strcmp(leks
,"list") == 0) {
108 if (strlen(res
)>=sizeof(list
)) {
109 debuga(_("Banning list name too long in redirector log file %s\n"),wentp
);
114 } else if(strcmp(leks
,"ip") == 0) {
115 if (strlen(res
)>=sizeof(ip
)) {
116 debuga(_("IP address too long in redirector log file %s\n"),wentp
);
121 } else if(strcmp(leks
,"user") == 0) {
122 if (strlen(res
)>=sizeof(user
)) {
123 debuga(_("User ID too long in redirector log file \"%s\"\n"),wentp
);
128 } else if(strcmp(leks
,"url") == 0) {
130 * Don't worry about the url being truncated as we only keep the host name
133 safe_strcpy(full_url
,res
,sizeof(full_url
));
138 if (getword_atoll(&lyear
,&gwarea
,'-')<0 || getword_atoll(&lmon
,&gwarea
,'-')<0 ||
139 getword_atoll(&lday
,&gwarea
,' ')<0) {
140 debuga(_("Invalid date in file \"%s\"\n"),wentp
);
147 if (getword(hour
,sizeof(hour
),&gwarea
,' ')<0) {
148 debuga(_("Invalid time in file \"%s\"\n"),wentp
);
152 if (getword_skip(MAXLEN
,&gwarea
,'(')<0 || getword(source
,sizeof(source
),&gwarea
,'/')<0) {
153 debuga(_("Invalid redirected source in file %s\n"),wentp
);
157 if (getword(list
,sizeof(list
),&gwarea
,'/')<0) {
158 debuga(_("Invalid redirected list in file \"%s\"\n"),wentp
);
162 if (getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword_limit(full_url
,sizeof(full_url
),&gwarea
,' ')<0) {
163 debuga(_("Invalid url in file \"%s\"\n"),wentp
);
167 if (getword(ip
,sizeof(ip
),&gwarea
,'/')<0) {
168 debuga(_("Invalid source IP in file \"%s\"\n"),wentp
);
172 if (getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword(user
,sizeof(user
),&gwarea
,' ')<0) {
173 debuga(_("Invalid user in file \"%s\"\n"),wentp
);
178 url
=process_url(full_url
,false);
180 //sprintf(warea,"%04d%02d%02d",year,mon,day);
182 if(RedirectorFilterOutDate
) {
183 idata
= year
*10000+mon
*100+day
;
184 if(idata
< dfrom
|| idata
> duntil
)
193 if(strcmp(user
,"-") == 0 || strcmp(user
," ") == 0 || strcmp(user
,"") == 0) {
194 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IP
) {
198 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IGNORE
)
200 if(RecordsWithoutUser
== RECORDWITHOUTUSER_EVERYBODY
)
201 strcpy(user
,"everybody");
204 uinfo
=userinfo_find_from_id(user
);
206 uinfo
=userinfo_create(user
,ip
);
207 uinfo
->id_is_ip
=id_is_ip
;
208 uinfo
->no_report
=true;
209 if(Ip2Name
&& id_is_ip
) ip2name(user
,sizeof(user
));
210 user_find(uinfo
->label
,MAX_USER_LEN
, user
);
212 fprintf(fp_ou
,"%s\t%04d%02d%02d\t%s\t%s\t%s\t",uinfo
->id
,year
,mon
,day
,hour
,ip
,url
);
213 if (source
[0] && list
[0])
214 fprintf(fp_ou
,"%s/%s\n",source
,list
);
216 fprintf(fp_ou
,"%s\n",source
);
218 fprintf(fp_ou
,"%s\n",list
);
222 static void read_log(const char *wentp
, FILE *fp_ou
,int dfrom
,int duntil
)
230 debuga(_("Reading redirector log file \"%s\"\n"),wentp
);
233 /* With squidGuard, you can log groups in only one log file.
234 We must parse each log files only one time. Example :
236 domainlist porn/domains
241 domainlist aggressive/domains
242 urllist aggressive/urls
246 domainlist audio-video/domains
247 urllist audio-video/urls
251 for (i
=0; i
<nfiles_done
; i
++)
252 if (!strcmp(wentp
, files_done
[i
])) return;
255 files_done
= realloc(files_done
, nfiles_done
*sizeof(char *));
257 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
260 files_done
[nfiles_done
-1] = strdup(wentp
);
261 if (!files_done
[nfiles_done
-1]) {
262 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
266 if ((fp_in
=fopen(wentp
,"r"))==NULL
) {
267 debugapos("squidguard",_("Cannot open file \"%s\": %s\n"),wentp
,strerror(errno
));
271 if ((line
=longline_create())==NULL
) {
272 debuga(_("Not enough memory to read file \"%s\"\n"),wentp
);
276 while ((buf
=longline_read(fp_in
,line
)) != NULL
) {
277 parse_log(fp_ou
,buf
);
280 longline_destroy(&line
);
285 void redirector_log(void)
287 FILE *fp_ou
= NULL
, *fp_guard
= NULL
;
289 char guard_in
[MAXLEN
];
302 if(SquidGuardConf
[0] == '\0' && NRedirectorLogs
== 0) {
303 if (debugz
) debugaz(_("No redirector logs provided to produce that kind of report\n"));
307 snprintf(guard_in
,sizeof(guard_in
),"%s/redirector.int_unsort",tmp
);
308 snprintf(redirector_sorted
,sizeof(redirector_sorted
),"%s/redirector.int_log",tmp
);
309 if((fp_ou
=fopen(guard_in
,"a"))==NULL
) {
310 debugapos("squidguard",_("Cannot open file \"%s\": %s\n"),guard_in
,strerror(errno
));
314 dfrom
=(period
.start
.tm_year
+1900)*10000+(period
.start
.tm_mon
+1)*100+period
.start
.tm_mday
;
315 duntil
=(period
.end
.tm_year
+1900)*10000+(period
.end
.tm_mon
+1)*100+period
.end
.tm_mday
;
317 if (NRedirectorLogs
>0) {
318 for (i
=0 ; i
<NRedirectorLogs
; i
++)
319 read_log(RedirectorLogs
[i
],fp_ou
,dfrom
,duntil
);
321 if(access(SquidGuardConf
, R_OK
) != 0) {
322 debuga(_("Cannot open squidGuard config file: %s\n"),SquidGuardConf
);
326 if((fp_guard
=fopen(SquidGuardConf
,"r"))==NULL
) {
327 debugapos("squidguard",_("Cannot open file \"%s\": %s\n"),SquidGuardConf
,strerror(errno
));
332 while(fgets(buf
,sizeof(buf
),fp_guard
)!=NULL
) {
334 if((str
=get_param_value("logdir",buf
))!=NULL
) {
336 We want to tolerate spaces inside the directory name but we must also
337 remove the trailing spaces left by the editor after the directory name.
338 This should not be a problem as nobody use a file name with trailing spaces.
340 for (y
=strlen(str
)-1 ; y
>=0 && (unsigned char)str
[y
]<=' ' ; y
--);
341 if (y
>=sizeof(logdir
)-1) y
=sizeof(logdir
)-2;
347 } else if((str
=get_param_value("log",buf
))!=NULL
) {
348 if((str2
=get_param_value("anonymous",str
))!=NULL
)
352 If logdir is defined, we prepend it to the log file name, otherwise, we assume
353 the log directive provides an absolute file name to the log file. Therefore,
354 we don't need to add an additionnal / at the beginning of the log file name.
356 y
=(logdir
[0]) ? sprintf(wentp
,"%s/",logdir
) : 0;
358 Spaces are allowed in the name of the log file. The file name ends at the first #
359 because it is assumed it is an end of line comment. Any space before the # is then
360 removed. Any control character (i.e. a character with a code lower than 32) ends
361 the file name. That includes the terminating zero.
363 while((unsigned char)*str
>=' ' && *str
!='#' && y
<sizeof(wentp
)-1)
367 while(*str
==' ' && y
>0) {
373 read_log(wentp
,fp_ou
,dfrom
,duntil
);
378 if (fp_guard
) fclose(fp_guard
);
379 if (fp_ou
) fclose(fp_ou
);
382 for (y
=0; y
<nfiles_done
; y
++)
383 if (files_done
[y
]) free(files_done
[y
]);
388 debuga(_("Sorting file \"%s\"\n"),redirector_sorted
);
391 if (snprintf(tmp6
,sizeof(tmp6
),"sort -t \"\t\" -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in
, redirector_sorted
)>=sizeof(tmp6
)) {
392 debuga(_("Command too long: "));
393 debuga_more("sort -t \"\t\" -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in
, redirector_sorted
);
396 cstatus
=system(tmp6
);
397 if (!WIFEXITED(cstatus
) || WEXITSTATUS(cstatus
)) {
398 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus
));
399 debuga(_("sort command: %s\n"),tmp6
);
403 if (!KeepTempLog
&& unlink(guard_in
)) {
404 debuga(_("Cannot delete \"%s\": %s\n"),guard_in
,strerror(errno
));
410 static void show_ignored_redirector(FILE *fp_ou
,int count
)
414 snprintf(ignored
,sizeof(ignored
),ngettext("%d more redirector entry not shown here…","%d more redirector entries not shown here…",count
),count
);
415 fprintf(fp_ou
,"<tr><td class=\"data\"></td><td class=\"data\"></td><td class=\"data\"></td><td class=\"data2 more\">%s</td><td class=\"data\"></td></tr>\n",ignored
);
418 void redirector_report(void)
420 FILE *fp_in
= NULL
, *fp_ou
= NULL
;
438 struct getwordstruct gwarea
;
439 const struct userinfostruct
*uinfo
;
446 if(!redirector_count
) {
447 if (!KeepTempLog
&& redirector_sorted
[0]!='\0' && unlink(redirector_sorted
))
448 debuga(_("Cannot delete \"%s\": %s\n"),redirector_sorted
,strerror(errno
));
449 if (debugz
) debugaz(_("Redirector report not generated because it is empty\n"));
453 snprintf(report
,sizeof(report
),"%s/redirector.html",outdirname
);
455 if((fp_in
=fopen(redirector_sorted
,"r"))==NULL
) {
456 debugapos("squidguard",_("Cannot open file \"%s\": %s\n"),redirector_sorted
,strerror(errno
));
460 if((fp_ou
=fopen(report
,"w"))==NULL
) {
461 debugapos("squidguard",_("Cannot open file \"%s\": %s\n"),report
,strerror(errno
));
465 if ((line
=longline_create())==NULL
) {
466 debuga(_("Not enough memory to read file \"%s\"\n"),redirector_sorted
);
470 write_html_header(fp_ou
,(IndexTree
== INDEX_TREE_DATE
) ? 3 : 1,_("Redirector report"),HTML_JS_NONE
);
471 fputs("<tr><td class=\"header_c\">",fp_ou
);
472 fprintf(fp_ou
,_("Period: %s"),period
.html
);
473 fputs("</td></tr>\n",fp_ou
);
474 fprintf(fp_ou
,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Redirector report"));
475 close_html_header(fp_ou
);
477 fputs("<div class=\"report\"><table cellpadding=1 cellspacing=2>\n",fp_ou
);
478 fprintf(fp_ou
,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"),_("RULE"));
480 while((buf
=longline_read(fp_in
,line
))!=NULL
) {
481 getword_start(&gwarea
,buf
);
482 if (getword(user
,sizeof(user
),&gwarea
,'\t')<0) {
483 debuga(_("Invalid user found in file \"%s\"\n"),redirector_sorted
);
486 if (getword_atoll(&data2
,&gwarea
,'\t')<0) {
487 debuga(_("Invalid date in file \"%s\"\n"),redirector_sorted
);
490 if (getword(hora
,sizeof(hora
),&gwarea
,'\t')<0) {
491 debuga(_("Invalid time in file \"%s\"\n"),redirector_sorted
);
494 if (getword(ip
,sizeof(ip
),&gwarea
,'\t')<0) {
495 debuga(_("Invalid IP address in file \"%s\"\n"),redirector_sorted
);
498 if (getword_ptr(buf
,&url
,&gwarea
,'\t')<0) {
499 debuga(_("Invalid url in file \"%s\"\n"),redirector_sorted
);
502 if (getword(rule
,sizeof(rule
),&gwarea
,'\n')<0) {
503 debuga(_("Invalid rule in file \"%s\"\n"),redirector_sorted
);
507 uinfo
=userinfo_find_from_id(user
);
509 debuga(_("Unknown user ID %s in file \"%s\"\n"),user
,redirector_sorted
);
513 computedate(data2
/10000,(data2
/100)%10,data2
%100,&t
);
514 strftime(data
,sizeof(data
),"%x",&t
);
521 if (Ip2Name
&& !uinfo
->id_is_ip
) ip2name(oname
,sizeof(oname
));
525 if(strcmp(ouser
,user
) != 0) {
529 if(strcmp(oip
,ip
) != 0) {
532 if (Ip2Name
&& !uinfo
->id_is_ip
) ip2name(oname
,sizeof(oname
));
537 if(SquidGuardReportLimit
) {
538 if(strcmp(ouser2
,uinfo
->label
) == 0) {
541 if(count
>SquidGuardReportLimit
&& SquidGuardReportLimit
>0)
542 show_ignored_redirector(fp_ou
,count
-SquidGuardReportLimit
);
544 strcpy(ouser2
,uinfo
->label
);
546 if(count
> SquidGuardReportLimit
)
551 fprintf(fp_ou
,"<tr><td class=\"data2\">%s</td><td class=\"data2\">%s</td>",uinfo
->label
,ip
);
553 fputs("<tr><td class=\"data2\"></td><td class=\"data2\"></td>",fp_ou
);
554 fprintf(fp_ou
,"<td class=\"data2\">%s-%s</td><td class=\"data2\">",data
,hora
);
555 output_html_link(fp_ou
,url
,100);
556 fprintf(fp_ou
,"</td><td class=\"data2\">%s</td></tr>\n",rule
);
559 longline_destroy(&line
);
561 if(count
>SquidGuardReportLimit
&& SquidGuardReportLimit
>0)
562 show_ignored_redirector(fp_ou
,count
-SquidGuardReportLimit
);
564 fputs("</table>\n",fp_ou
);
566 if (RedirectorErrors
>0)
568 fputs("<div class=\"warn\"><span>",fp_ou
);
569 fprintf(fp_ou
,ngettext("%d error found in the log file. Some entries may be missing.","%d errors found in the log file. Some entries may be missing.",RedirectorErrors
),RedirectorErrors
);
570 fputs("</span></div>\n",fp_ou
);
573 fputs("</div>\n",fp_ou
);
574 if (write_html_trailer(fp_ou
)<0)
575 debuga(_("Write error in file \"%s\"\n"),report
);
576 if (fclose(fp_ou
)==EOF
)
577 debuga(_("Failed to close file \"%s\": %s\n"),report
,strerror(errno
));
579 if (!KeepTempLog
&& unlink(redirector_sorted
)) {
580 debuga(_("Cannot delete \"%s\": %s\n"),redirector_sorted
,strerror(errno
));