2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
30 static char **files_done
= NULL
;
31 static int nfiles_done
= 0;
33 //! The number of invalid lines found in the redirector report.
34 static int RedirectorErrors
=0;
35 //! The file containing the sorted entries.
36 static char redirector_sorted
[MAXLEN
]="";
38 extern char StripUserSuffix
[MAX_USER_LEN
];
39 extern int StripSuffixLen
;
41 static void parse_log(FILE *fp_ou
,char *buf
,int dfrom
,int duntil
)
43 char leks
[5], sep
[2], res
[MAXLEN
];
45 char source
[128], list
[128];
46 char full_url
[MAX_URL_LEN
];
48 char user
[MAX_USER_LEN
];
50 char userlabel
[MAX_USER_LEN
];
51 long long int lmon
, lday
, lyear
;
55 struct getwordstruct gwarea
;
56 struct getwordstruct gwarea1
;
57 struct userinfostruct
*uinfo
;
59 getword_start(&gwarea
,buf
);
60 if(RedirectorLogFormat
[0] != '\0') {
61 getword_start(&gwarea1
,RedirectorLogFormat
);
63 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0) {
64 debuga(__FILE__
,__LINE__
,_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters before first tag)\n"));
76 while(strcmp(leks
,"end") != 0) {
77 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0) {
78 debuga(__FILE__
,__LINE__
,_("Invalid \"redirector_log_format\" option in your sarg.conf (missing # at end of tag)\n"));
81 if (getword(sep
,sizeof(sep
),&gwarea1
,'#')<0) {
82 debuga(__FILE__
,__LINE__
,_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters in column separator)\n"));
85 if(strcmp(leks
,"end") != 0) {
86 if (getword_limit(res
,sizeof(res
),&gwarea
,sep
[0])<0) {
87 debuga(__FILE__
,__LINE__
,_("Parsing of tag \"%s\" in redirector log \"%s\" returned no result\n"),leks
,wentp
);
91 if(strcmp(leks
,"year") == 0) {
93 } else if(strcmp(leks
,"mon") == 0) {
95 } else if(strcmp(leks
,"day") == 0) {
97 } else if(strcmp(leks
,"hour") == 0) {
98 if (strlen(res
)>=sizeof(hour
)) {
99 debuga(__FILE__
,__LINE__
,_("Hour string too long in redirector log file \"%s\"\n"),wentp
);
104 } else if(strcmp(leks
,"source") == 0) {
105 if (strlen(res
)>=sizeof(source
)) {
106 debuga(__FILE__
,__LINE__
,_("Banning source name too long in redirector log file \"%s\"\n"),wentp
);
111 } else if(strcmp(leks
,"list") == 0) {
112 if (strlen(res
)>=sizeof(list
)) {
113 debuga(__FILE__
,__LINE__
,_("Banning list name too long in redirector log file \"%s\"\n"),wentp
);
118 } else if(strcmp(leks
,"ip") == 0) {
119 if (strlen(res
)>=sizeof(ip
)) {
120 debuga(__FILE__
,__LINE__
,_("IP address too long in redirector log file \"%s\"\n"),wentp
);
125 } else if(strcmp(leks
,"user") == 0) {
126 if (strlen(res
)>=sizeof(user
)) {
127 debuga(__FILE__
,__LINE__
,_("User ID too long in redirector log file \"%s\"\n"),wentp
);
132 } else if(strcmp(leks
,"url") == 0) {
134 * Don't worry about the url being truncated as we only keep the host name
137 safe_strcpy(full_url
,res
,sizeof(full_url
));
142 if (getword_atoll(&lyear
,&gwarea
,'-')<0 || getword_atoll(&lmon
,&gwarea
,'-')<0 ||
143 getword_atoll(&lday
,&gwarea
,' ')<0) {
144 debuga(__FILE__
,__LINE__
,_("Invalid date in file \"%s\"\n"),wentp
);
151 if (getword(hour
,sizeof(hour
),&gwarea
,' ')<0) {
152 debuga(__FILE__
,__LINE__
,_("Invalid time in file \"%s\"\n"),wentp
);
156 if (getword_skip(MAXLEN
,&gwarea
,'(')<0 || getword(source
,sizeof(source
),&gwarea
,'/')<0) {
157 debuga(__FILE__
,__LINE__
,_("Invalid redirected source in file \"%s\"\n"),wentp
);
161 if (getword(list
,sizeof(list
),&gwarea
,'/')<0) {
162 debuga(__FILE__
,__LINE__
,_("Invalid redirected list in file \"%s\"\n"),wentp
);
166 if (getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword_limit(full_url
,sizeof(full_url
),&gwarea
,' ')<0) {
167 debuga(__FILE__
,__LINE__
,_("Invalid url in file \"%s\"\n"),wentp
);
171 if (getword(ip
,sizeof(ip
),&gwarea
,'/')<0) {
172 debuga(__FILE__
,__LINE__
,_("Invalid source IP in file \"%s\"\n"),wentp
);
176 if (getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword(user
,sizeof(user
),&gwarea
,' ')<0) {
177 debuga(__FILE__
,__LINE__
,_("Invalid user in file \"%s\"\n"),wentp
);
182 url
=process_url(full_url
,false);
184 //sprintf(warea,"%04d%02d%02d",year,mon,day);
186 if (RedirectorFilterOutDate
) {
187 idata
= year
*10000+mon
*100+day
;
188 if(idata
< dfrom
|| idata
> duntil
)
192 if (StripSuffixLen
>0)
195 if (x
>StripSuffixLen
&& strcasecmp(user
+(x
-StripSuffixLen
),StripUserSuffix
)==0)
196 user
[x
-StripSuffixLen
]='\0';
203 if (user
[0]=='\0' || (user
[1]=='\0' && (user
[0]=='-' || user
[0]==' '))) {
204 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IP
) {
208 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IGNORE
)
210 if(RecordsWithoutUser
== RECORDWITHOUTUSER_EVERYBODY
)
211 strcpy(user
,"everybody");
214 uinfo
=userinfo_find_from_id(user
);
216 uinfo
=userinfo_create(user
,(id_is_ip
) ? NULL
: ip
);
217 uinfo
->no_report
=true;
218 if(Ip2Name
&& id_is_ip
) ip2name(user
,sizeof(user
));
219 user_find(userlabel
,MAX_USER_LEN
, user
);
220 userinfo_label(uinfo
,userlabel
);
222 fprintf(fp_ou
,"%s\t%04d%02d%02d\t%s\t%s\t%s\t",uinfo
->id
,year
,mon
,day
,hour
,ip
,url
);
223 if (source
[0] && list
[0])
224 fprintf(fp_ou
,"%s/%s\n",source
,list
);
226 fprintf(fp_ou
,"%s\n",source
);
228 fprintf(fp_ou
,"%s\n",list
);
232 static void read_log(const char *wentp
, FILE *fp_ou
,int dfrom
,int duntil
)
234 FileObject
*fp_in
= NULL
;
240 debuga(__FILE__
,__LINE__
,_("Reading redirector log file \"%s\"\n"),wentp
);
243 /* With squidGuard, you can log groups in only one log file.
244 We must parse each log files only one time. Example :
246 domainlist porn/domains
251 domainlist aggressive/domains
252 urllist aggressive/urls
256 domainlist audio-video/domains
257 urllist audio-video/urls
261 for (i
=0; i
<nfiles_done
; i
++)
262 if (!strcmp(wentp
, files_done
[i
])) return;
265 files_done
= realloc(files_done
, nfiles_done
*sizeof(char *));
267 debuga(__FILE__
,__LINE__
,_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
270 files_done
[nfiles_done
-1] = strdup(wentp
);
271 if (!files_done
[nfiles_done
-1]) {
272 debuga(__FILE__
,__LINE__
,_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
276 if ((fp_in
=FileObject_Open(wentp
))==NULL
) {
277 debuga(__FILE__
,__LINE__
,_("Cannot open file \"%s\": %s\n"),wentp
,FileObject_GetLastOpenError());
281 if ((line
=longline_create())==NULL
) {
282 debuga(__FILE__
,__LINE__
,_("Not enough memory to read file \"%s\"\n"),wentp
);
286 while ((buf
=longline_read(fp_in
,line
)) != NULL
) {
287 parse_log(fp_ou
,buf
,dfrom
,duntil
);
289 if (FileObject_Close(fp_in
)) {
290 debuga(__FILE__
,__LINE__
,_("Read error in \"%s\": %s\n"),wentp
,FileObject_GetLastCloseError());
293 longline_destroy(&line
);
298 void redirector_log(void)
300 FILE *fp_ou
= NULL
, *fp_guard
= NULL
;
302 char guard_in
[MAXLEN
];
315 if(SquidGuardConf
[0] == '\0' && NRedirectorLogs
== 0) {
316 if (debugz
>=LogLevel_Process
) debugaz(__FILE__
,__LINE__
,_("No redirector logs provided to produce that kind of report\n"));
320 snprintf(guard_in
,sizeof(guard_in
),"%s/redirector.int_unsort",tmp
);
321 if((fp_ou
=fopen(guard_in
,"w"))==NULL
) {
322 debuga(__FILE__
,__LINE__
,_("Cannot open file \"%s\": %s\n"),guard_in
,strerror(errno
));
326 dfrom
=(period
.start
.tm_year
+1900)*10000+(period
.start
.tm_mon
+1)*100+period
.start
.tm_mday
;
327 duntil
=(period
.end
.tm_year
+1900)*10000+(period
.end
.tm_mon
+1)*100+period
.end
.tm_mday
;
329 if (NRedirectorLogs
>0) {
330 for (i
=0 ; i
<NRedirectorLogs
; i
++)
331 read_log(RedirectorLogs
[i
],fp_ou
,dfrom
,duntil
);
333 if(access(SquidGuardConf
, R_OK
) != 0) {
334 debuga(__FILE__
,__LINE__
,_("Cannot open file \"%s\": %s\n"),SquidGuardConf
,strerror(errno
));
338 if((fp_guard
=fopen(SquidGuardConf
,"r"))==NULL
) {
339 debuga(__FILE__
,__LINE__
,_("Cannot open file \"%s\": %s\n"),SquidGuardConf
,strerror(errno
));
344 while(fgets(buf
,sizeof(buf
),fp_guard
)!=NULL
) {
346 if((str
=get_param_value("logdir",buf
))!=NULL
) {
348 We want to tolerate spaces inside the directory name but we must also
349 remove the trailing spaces left by the editor after the directory name.
350 This should not be a problem as nobody use a file name with trailing spaces.
352 for (y
=strlen(str
)-1 ; y
>=0 && (unsigned char)str
[y
]<=' ' ; y
--);
353 if (y
>=sizeof(logdir
)-1) y
=sizeof(logdir
)-2;
359 } else if((str
=get_param_value("log",buf
))!=NULL
) {
360 if((str2
=get_param_value("anonymous",str
))!=NULL
)
364 If logdir is defined, we prepend it to the log file name, otherwise, we assume
365 the log directive provides an absolute file name to the log file. Therefore,
366 we don't need to add an additionnal / at the beginning of the log file name.
368 y
=(logdir
[0]) ? sprintf(wentp
,"%s/",logdir
) : 0;
370 Spaces are allowed in the name of the log file. The file name ends at the first #
371 because it is assumed it is an end of line comment. Any space before the # is then
372 removed. Any control character (i.e. a character with a code lower than 32) ends
373 the file name. That includes the terminating zero.
375 while((unsigned char)*str
>=' ' && *str
!='#' && y
<sizeof(wentp
)-1)
379 while(*str
==' ' && y
>0) {
385 read_log(wentp
,fp_ou
,dfrom
,duntil
);
388 if (fclose(fp_guard
)==EOF
) {
389 debuga(__FILE__
,__LINE__
,_("Read error in \"%s\": %s\n"),SquidGuardConf
,strerror(errno
));
394 if (fp_ou
&& fclose(fp_ou
)==EOF
) {
395 debuga(__FILE__
,__LINE__
,_("Write error in \"%s\": %s\n"),guard_in
,strerror(errno
));
400 for (y
=0; y
<nfiles_done
; y
++)
401 if (files_done
[y
]) free(files_done
[y
]);
405 if (redirector_count
) {
406 snprintf(redirector_sorted
,sizeof(redirector_sorted
),"%s/redirector.int_log",tmp
);
408 debuga(__FILE__
,__LINE__
,_("Sorting file \"%s\"\n"),redirector_sorted
);
411 if (snprintf(tmp6
,sizeof(tmp6
),"sort -t \"\t\" -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in
, redirector_sorted
)>=sizeof(tmp6
)) {
412 debuga(__FILE__
,__LINE__
,_("Sort command too long when sorting file \"%s\" to \"%s\"\n"),guard_in
,redirector_sorted
);
415 cstatus
=system(tmp6
);
416 if (!WIFEXITED(cstatus
) || WEXITSTATUS(cstatus
)) {
417 debuga(__FILE__
,__LINE__
,_("sort command return status %d\n"),WEXITSTATUS(cstatus
));
418 debuga(__FILE__
,__LINE__
,_("sort command: %s\n"),tmp6
);
423 if (!KeepTempLog
&& unlink(guard_in
)) {
424 debuga(__FILE__
,__LINE__
,_("Cannot delete \"%s\": %s\n"),guard_in
,strerror(errno
));
430 static void show_ignored_redirector(FILE *fp_ou
,int count
)
434 snprintf(ignored
,sizeof(ignored
),ngettext("%d more redirector entry not shown here…","%d more redirector entries not shown here…",count
),count
);
435 fprintf(fp_ou
,"<tr><td class=\"data\"></td><td class=\"data\"></td><td class=\"data\"></td><td class=\"data2 more\">%s</td><td class=\"data\"></td></tr>\n",ignored
);
438 void redirector_report(void)
440 FileObject
*fp_in
= NULL
;
459 struct getwordstruct gwarea
;
460 const struct userinfostruct
*uinfo
;
467 if(!redirector_count
) {
468 if (debugz
>=LogLevel_Process
) {
469 if (redirector_sorted
[0])
470 debugaz(__FILE__
,__LINE__
,_("Redirector report not generated because it is empty\n"));
475 snprintf(report
,sizeof(report
),"%s/redirector.html",outdirname
);
477 if((fp_in
=FileObject_Open(redirector_sorted
))==NULL
) {
478 debuga(__FILE__
,__LINE__
,_("Cannot open file \"%s\": %s\n"),redirector_sorted
,FileObject_GetLastOpenError());
482 if((fp_ou
=fopen(report
,"w"))==NULL
) {
483 debuga(__FILE__
,__LINE__
,_("Cannot open file \"%s\": %s\n"),report
,strerror(errno
));
487 if ((line
=longline_create())==NULL
) {
488 debuga(__FILE__
,__LINE__
,_("Not enough memory to read file \"%s\"\n"),redirector_sorted
);
492 write_html_header(fp_ou
,(IndexTree
== INDEX_TREE_DATE
) ? 3 : 1,_("Redirector report"),HTML_JS_NONE
);
493 fputs("<tr><td class=\"header_c\">",fp_ou
);
494 fprintf(fp_ou
,_("Period: %s"),period
.html
);
495 fputs("</td></tr>\n",fp_ou
);
496 fprintf(fp_ou
,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Redirector report"));
497 close_html_header(fp_ou
);
499 fputs("<div class=\"report\"><table cellpadding=1 cellspacing=2>\n",fp_ou
);
500 fprintf(fp_ou
,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"),_("RULE"));
502 while((buf
=longline_read(fp_in
,line
))!=NULL
) {
503 getword_start(&gwarea
,buf
);
504 if (getword(user
,sizeof(user
),&gwarea
,'\t')<0) {
505 debuga(__FILE__
,__LINE__
,_("Invalid user in file \"%s\"\n"),redirector_sorted
);
508 if (getword_atoll(&data2
,&gwarea
,'\t')<0) {
509 debuga(__FILE__
,__LINE__
,_("Invalid date in file \"%s\"\n"),redirector_sorted
);
512 if (getword(hora
,sizeof(hora
),&gwarea
,'\t')<0) {
513 debuga(__FILE__
,__LINE__
,_("Invalid time in file \"%s\"\n"),redirector_sorted
);
516 if (getword(ip
,sizeof(ip
),&gwarea
,'\t')<0) {
517 debuga(__FILE__
,__LINE__
,_("Invalid IP address in file \"%s\"\n"),redirector_sorted
);
520 if (getword_ptr(buf
,&url
,&gwarea
,'\t')<0) {
521 debuga(__FILE__
,__LINE__
,_("Invalid url in file \"%s\"\n"),redirector_sorted
);
524 if (getword(rule
,sizeof(rule
),&gwarea
,'\n')<0) {
525 debuga(__FILE__
,__LINE__
,_("Invalid rule in file \"%s\"\n"),redirector_sorted
);
529 uinfo
=userinfo_find_from_id(user
);
531 debuga(__FILE__
,__LINE__
,_("Unknown user ID %s in file \"%s\"\n"),user
,redirector_sorted
);
535 computedate(data2
/10000,(data2
/100)%10,data2
%100,&t
);
536 strftime(data
,sizeof(data
),"%x",&t
);
543 if (Ip2Name
&& !uinfo
->id_is_ip
) ip2name(oname
,sizeof(oname
));
547 if(strcmp(ouser
,user
) != 0) {
551 if(strcmp(oip
,ip
) != 0) {
554 if (Ip2Name
&& !uinfo
->id_is_ip
) ip2name(oname
,sizeof(oname
));
559 if(SquidGuardReportLimit
) {
560 if(strcmp(ouser2
,uinfo
->label
) == 0) {
563 if(count
>SquidGuardReportLimit
&& SquidGuardReportLimit
>0)
564 show_ignored_redirector(fp_ou
,count
-SquidGuardReportLimit
);
566 strcpy(ouser2
,uinfo
->label
);
568 if(count
> SquidGuardReportLimit
)
573 fprintf(fp_ou
,"<tr><td class=\"data2\">%s</td><td class=\"data2\">%s</td>",uinfo
->label
,ip
);
575 fputs("<tr><td class=\"data2\"></td><td class=\"data2\"></td>",fp_ou
);
576 fprintf(fp_ou
,"<td class=\"data2\">%s-%s</td><td class=\"data2\">",data
,hora
);
577 output_html_link(fp_ou
,url
,100);
578 fprintf(fp_ou
,"</td><td class=\"data2\">%s</td></tr>\n",rule
);
580 if (FileObject_Close(fp_in
)) {
581 debuga(__FILE__
,__LINE__
,_("Read error in \"%s\": %s\n"),redirector_sorted
,FileObject_GetLastCloseError());
584 longline_destroy(&line
);
586 if(count
>SquidGuardReportLimit
&& SquidGuardReportLimit
>0)
587 show_ignored_redirector(fp_ou
,count
-SquidGuardReportLimit
);
589 fputs("</table>\n",fp_ou
);
591 if (RedirectorErrors
>0)
593 fputs("<div class=\"warn\"><span>",fp_ou
);
594 fprintf(fp_ou
,ngettext("%d error found in the log file. Some entries may be missing.","%d errors found in the log file. Some entries may be missing.",RedirectorErrors
),RedirectorErrors
);
595 fputs("</span></div>\n",fp_ou
);
598 fputs("</div>\n",fp_ou
);
599 write_html_trailer(fp_ou
);
600 if (fclose(fp_ou
)==EOF
) {
601 debuga(__FILE__
,__LINE__
,_("Write error in \"%s\": %s\n"),report
,strerror(errno
));
605 if (!KeepTempLog
&& unlink(redirector_sorted
)) {
606 debuga(__FILE__
,__LINE__
,_("Cannot delete \"%s\": %s\n"),redirector_sorted
,strerror(errno
));