2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
30 static char **files_done
= NULL
;
31 static int nfiles_done
= 0;
33 //! The number of invalid lines found in the redirector report.
34 static int RedirectorErrors
=0;
35 //! The file containing the sorted entries.
36 static char redirector_sorted
[MAXLEN
]="";
37 //! Length of the suffix to strip from the user name.
38 static int StripSuffixLen
;
40 extern char StripUserSuffix
[MAX_USER_LEN
];
42 static void parse_log(FILE *fp_ou
,char *buf
,int dfrom
,int duntil
)
44 char leks
[5], sep
[2], res
[MAXLEN
];
46 char source
[128], list
[128];
47 char full_url
[MAX_URL_LEN
];
49 char user
[MAX_USER_LEN
];
51 char userlabel
[MAX_USER_LEN
];
52 long long int lmon
, lday
, lyear
;
56 struct getwordstruct gwarea
;
57 struct getwordstruct gwarea1
;
58 struct userinfostruct
*uinfo
;
60 getword_start(&gwarea
,buf
);
61 if(RedirectorLogFormat
[0] != '\0') {
62 getword_start(&gwarea1
,RedirectorLogFormat
);
64 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0) {
65 debuga(__FILE__
,__LINE__
,_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters before first tag)\n"));
77 while(strcmp(leks
,"end") != 0) {
78 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0) {
79 debuga(__FILE__
,__LINE__
,_("Invalid \"redirector_log_format\" option in your sarg.conf (missing # at end of tag)\n"));
82 if (getword(sep
,sizeof(sep
),&gwarea1
,'#')<0) {
83 debuga(__FILE__
,__LINE__
,_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters in column separator)\n"));
86 if(strcmp(leks
,"end") != 0) {
87 if (getword_limit(res
,sizeof(res
),&gwarea
,sep
[0])<0) {
88 debuga(__FILE__
,__LINE__
,_("Parsing of tag \"%s\" in redirector log \"%s\" returned no result\n"),leks
,wentp
);
92 if(strcmp(leks
,"year") == 0) {
94 } else if(strcmp(leks
,"mon") == 0) {
96 } else if(strcmp(leks
,"day") == 0) {
98 } else if(strcmp(leks
,"hour") == 0) {
99 if (strlen(res
)>=sizeof(hour
)) {
100 debuga(__FILE__
,__LINE__
,_("Hour string too long in redirector log file \"%s\"\n"),wentp
);
105 } else if(strcmp(leks
,"source") == 0) {
106 if (strlen(res
)>=sizeof(source
)) {
107 debuga(__FILE__
,__LINE__
,_("Banning source name too long in redirector log file \"%s\"\n"),wentp
);
112 } else if(strcmp(leks
,"list") == 0) {
113 if (strlen(res
)>=sizeof(list
)) {
114 debuga(__FILE__
,__LINE__
,_("Banning list name too long in redirector log file \"%s\"\n"),wentp
);
119 } else if(strcmp(leks
,"ip") == 0) {
120 if (strlen(res
)>=sizeof(ip
)) {
121 debuga(__FILE__
,__LINE__
,_("IP address too long in redirector log file \"%s\"\n"),wentp
);
126 } else if(strcmp(leks
,"user") == 0) {
127 if (strlen(res
)>=sizeof(user
)) {
128 debuga(__FILE__
,__LINE__
,_("User ID too long in redirector log file \"%s\"\n"),wentp
);
133 } else if(strcmp(leks
,"url") == 0) {
135 * Don't worry about the url being truncated as we only keep the host name
138 safe_strcpy(full_url
,res
,sizeof(full_url
));
143 if (getword_atoll(&lyear
,&gwarea
,'-')<0 || getword_atoll(&lmon
,&gwarea
,'-')<0 ||
144 getword_atoll(&lday
,&gwarea
,' ')<0) {
145 debuga(__FILE__
,__LINE__
,_("Invalid date in file \"%s\"\n"),wentp
);
152 if (getword(hour
,sizeof(hour
),&gwarea
,' ')<0) {
153 debuga(__FILE__
,__LINE__
,_("Invalid time in file \"%s\"\n"),wentp
);
157 if (getword_skip(MAXLEN
,&gwarea
,'(')<0 || getword(source
,sizeof(source
),&gwarea
,'/')<0) {
158 debuga(__FILE__
,__LINE__
,_("Invalid redirected source in file \"%s\"\n"),wentp
);
162 if (getword(list
,sizeof(list
),&gwarea
,'/')<0) {
163 debuga(__FILE__
,__LINE__
,_("Invalid redirected list in file \"%s\"\n"),wentp
);
167 if (getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword_limit(full_url
,sizeof(full_url
),&gwarea
,' ')<0) {
168 debuga(__FILE__
,__LINE__
,_("Invalid url in file \"%s\"\n"),wentp
);
172 if (getword(ip
,sizeof(ip
),&gwarea
,'/')<0) {
173 debuga(__FILE__
,__LINE__
,_("Invalid source IP in file \"%s\"\n"),wentp
);
177 if (getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword(user
,sizeof(user
),&gwarea
,' ')<0) {
178 debuga(__FILE__
,__LINE__
,_("Invalid user in file \"%s\"\n"),wentp
);
183 url
=process_url(full_url
,false);
185 //sprintf(warea,"%04d%02d%02d",year,mon,day);
187 if (RedirectorFilterOutDate
) {
188 idata
= year
*10000+mon
*100+day
;
189 if(idata
< dfrom
|| idata
> duntil
)
193 if (StripSuffixLen
>0)
196 if (x
>StripSuffixLen
&& strcasecmp(user
+(x
-StripSuffixLen
),StripUserSuffix
)==0)
197 user
[x
-StripSuffixLen
]='\0';
204 if (user
[0]=='\0' || (user
[1]=='\0' && (user
[0]=='-' || user
[0]==' '))) {
205 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IP
) {
209 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IGNORE
)
211 if(RecordsWithoutUser
== RECORDWITHOUTUSER_EVERYBODY
)
212 strcpy(user
,"everybody");
215 uinfo
=userinfo_find_from_id(user
);
217 uinfo
=userinfo_create(user
,(id_is_ip
) ? NULL
: ip
);
218 uinfo
->no_report
=true;
219 if(Ip2Name
&& id_is_ip
) ip2name(user
,sizeof(user
));
220 user_find(userlabel
,MAX_USER_LEN
, user
);
221 userinfo_label(uinfo
,userlabel
);
223 fprintf(fp_ou
,"%s\t%04d%02d%02d\t%s\t%s\t%s\t",uinfo
->id
,year
,mon
,day
,hour
,ip
,url
);
224 if (source
[0] && list
[0])
225 fprintf(fp_ou
,"%s/%s\n",source
,list
);
227 fprintf(fp_ou
,"%s\n",source
);
229 fprintf(fp_ou
,"%s\n",list
);
233 static void read_log(const char *wentp
, FILE *fp_ou
,int dfrom
,int duntil
)
235 FileObject
*fp_in
= NULL
;
241 debuga(__FILE__
,__LINE__
,_("Reading redirector log file \"%s\"\n"),wentp
);
244 /* With squidGuard, you can log groups in only one log file.
245 We must parse each log files only one time. Example :
247 domainlist porn/domains
252 domainlist aggressive/domains
253 urllist aggressive/urls
257 domainlist audio-video/domains
258 urllist audio-video/urls
262 for (i
=0; i
<nfiles_done
; i
++)
263 if (!strcmp(wentp
, files_done
[i
])) return;
266 files_done
= realloc(files_done
, nfiles_done
*sizeof(char *));
268 debuga(__FILE__
,__LINE__
,_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
271 files_done
[nfiles_done
-1] = strdup(wentp
);
272 if (!files_done
[nfiles_done
-1]) {
273 debuga(__FILE__
,__LINE__
,_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
277 if ((fp_in
=FileObject_Open(wentp
))==NULL
) {
278 debuga(__FILE__
,__LINE__
,_("Cannot open file \"%s\": %s\n"),wentp
,FileObject_GetLastOpenError());
282 if ((line
=longline_create())==NULL
) {
283 debuga(__FILE__
,__LINE__
,_("Not enough memory to read file \"%s\"\n"),wentp
);
286 StripSuffixLen
=strlen(StripUserSuffix
);
288 while ((buf
=longline_read(fp_in
,line
)) != NULL
) {
289 parse_log(fp_ou
,buf
,dfrom
,duntil
);
291 if (FileObject_Close(fp_in
)) {
292 debuga(__FILE__
,__LINE__
,_("Read error in \"%s\": %s\n"),wentp
,FileObject_GetLastCloseError());
295 longline_destroy(&line
);
300 void redirector_log(void)
302 FILE *fp_ou
= NULL
, *fp_guard
= NULL
;
304 char guard_in
[MAXLEN
];
317 if(SquidGuardConf
[0] == '\0' && NRedirectorLogs
== 0) {
318 if (debugz
>=LogLevel_Process
) debugaz(__FILE__
,__LINE__
,_("No redirector logs provided to produce that kind of report\n"));
322 snprintf(guard_in
,sizeof(guard_in
),"%s/redirector.int_unsort",tmp
);
323 if((fp_ou
=fopen(guard_in
,"w"))==NULL
) {
324 debuga(__FILE__
,__LINE__
,_("Cannot open file \"%s\": %s\n"),guard_in
,strerror(errno
));
328 dfrom
=(period
.start
.tm_year
+1900)*10000+(period
.start
.tm_mon
+1)*100+period
.start
.tm_mday
;
329 duntil
=(period
.end
.tm_year
+1900)*10000+(period
.end
.tm_mon
+1)*100+period
.end
.tm_mday
;
331 if (NRedirectorLogs
>0) {
332 for (i
=0 ; i
<NRedirectorLogs
; i
++)
333 read_log(RedirectorLogs
[i
],fp_ou
,dfrom
,duntil
);
335 if(access(SquidGuardConf
, R_OK
) != 0) {
336 debuga(__FILE__
,__LINE__
,_("Cannot open file \"%s\": %s\n"),SquidGuardConf
,strerror(errno
));
340 if((fp_guard
=fopen(SquidGuardConf
,"r"))==NULL
) {
341 debuga(__FILE__
,__LINE__
,_("Cannot open file \"%s\": %s\n"),SquidGuardConf
,strerror(errno
));
346 while(fgets(buf
,sizeof(buf
),fp_guard
)!=NULL
) {
348 if((str
=get_param_value("logdir",buf
))!=NULL
) {
350 We want to tolerate spaces inside the directory name but we must also
351 remove the trailing spaces left by the editor after the directory name.
352 This should not be a problem as nobody use a file name with trailing spaces.
354 for (y
=strlen(str
)-1 ; y
>=0 && (unsigned char)str
[y
]<=' ' ; y
--);
355 if (y
>=sizeof(logdir
)-1) y
=sizeof(logdir
)-2;
361 } else if((str
=get_param_value("log",buf
))!=NULL
) {
362 if((str2
=get_param_value("anonymous",str
))!=NULL
)
366 If logdir is defined, we prepend it to the log file name, otherwise, we assume
367 the log directive provides an absolute file name to the log file. Therefore,
368 we don't need to add an additionnal / at the beginning of the log file name.
370 y
=(logdir
[0]) ? sprintf(wentp
,"%s/",logdir
) : 0;
372 Spaces are allowed in the name of the log file. The file name ends at the first #
373 because it is assumed it is an end of line comment. Any space before the # is then
374 removed. Any control character (i.e. a character with a code lower than 32) ends
375 the file name. That includes the terminating zero.
377 while((unsigned char)*str
>=' ' && *str
!='#' && y
<sizeof(wentp
)-1)
381 while(*str
==' ' && y
>0) {
387 read_log(wentp
,fp_ou
,dfrom
,duntil
);
390 if (fclose(fp_guard
)==EOF
) {
391 debuga(__FILE__
,__LINE__
,_("Read error in \"%s\": %s\n"),SquidGuardConf
,strerror(errno
));
396 if (fp_ou
&& fclose(fp_ou
)==EOF
) {
397 debuga(__FILE__
,__LINE__
,_("Write error in \"%s\": %s\n"),guard_in
,strerror(errno
));
402 for (y
=0; y
<nfiles_done
; y
++)
403 if (files_done
[y
]) free(files_done
[y
]);
407 if (redirector_count
) {
408 snprintf(redirector_sorted
,sizeof(redirector_sorted
),"%s/redirector.int_log",tmp
);
410 debuga(__FILE__
,__LINE__
,_("Sorting file \"%s\"\n"),redirector_sorted
);
413 if (snprintf(tmp6
,sizeof(tmp6
),"sort -t \"\t\" -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in
, redirector_sorted
)>=sizeof(tmp6
)) {
414 debuga(__FILE__
,__LINE__
,_("Sort command too long when sorting file \"%s\" to \"%s\"\n"),guard_in
,redirector_sorted
);
417 cstatus
=system(tmp6
);
418 if (!WIFEXITED(cstatus
) || WEXITSTATUS(cstatus
)) {
419 debuga(__FILE__
,__LINE__
,_("sort command return status %d\n"),WEXITSTATUS(cstatus
));
420 debuga(__FILE__
,__LINE__
,_("sort command: %s\n"),tmp6
);
425 if (!KeepTempLog
&& unlink(guard_in
)) {
426 debuga(__FILE__
,__LINE__
,_("Cannot delete \"%s\": %s\n"),guard_in
,strerror(errno
));
432 static void show_ignored_redirector(FILE *fp_ou
,int count
)
436 snprintf(ignored
,sizeof(ignored
),ngettext("%d more redirector entry not shown here…","%d more redirector entries not shown here…",count
),count
);
437 fprintf(fp_ou
,"<tr><td class=\"data\"></td><td class=\"data\"></td><td class=\"data\"></td><td class=\"data2 more\">%s</td><td class=\"data\"></td></tr>\n",ignored
);
440 void redirector_report(void)
442 FileObject
*fp_in
= NULL
;
461 struct getwordstruct gwarea
;
462 const struct userinfostruct
*uinfo
;
469 if(!redirector_count
) {
470 if (debugz
>=LogLevel_Process
) {
471 if (redirector_sorted
[0])
472 debugaz(__FILE__
,__LINE__
,_("Redirector report not generated because it is empty\n"));
477 snprintf(report
,sizeof(report
),"%s/redirector.html",outdirname
);
479 if((fp_in
=FileObject_Open(redirector_sorted
))==NULL
) {
480 debuga(__FILE__
,__LINE__
,_("Cannot open file \"%s\": %s\n"),redirector_sorted
,FileObject_GetLastOpenError());
484 if((fp_ou
=fopen(report
,"w"))==NULL
) {
485 debuga(__FILE__
,__LINE__
,_("Cannot open file \"%s\": %s\n"),report
,strerror(errno
));
489 if ((line
=longline_create())==NULL
) {
490 debuga(__FILE__
,__LINE__
,_("Not enough memory to read file \"%s\"\n"),redirector_sorted
);
494 write_html_header(fp_ou
,(IndexTree
== INDEX_TREE_DATE
) ? 3 : 1,_("Redirector report"),HTML_JS_NONE
);
495 fputs("<tr><td class=\"header_c\">",fp_ou
);
496 fprintf(fp_ou
,_("Period: %s"),period
.html
);
497 fputs("</td></tr>\n",fp_ou
);
498 fprintf(fp_ou
,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Redirector report"));
499 close_html_header(fp_ou
);
501 fputs("<div class=\"report\"><table cellpadding=1 cellspacing=2>\n",fp_ou
);
502 fprintf(fp_ou
,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"),_("RULE"));
504 while((buf
=longline_read(fp_in
,line
))!=NULL
) {
505 getword_start(&gwarea
,buf
);
506 if (getword(user
,sizeof(user
),&gwarea
,'\t')<0) {
507 debuga(__FILE__
,__LINE__
,_("Invalid user in file \"%s\"\n"),redirector_sorted
);
510 if (getword_atoll(&data2
,&gwarea
,'\t')<0) {
511 debuga(__FILE__
,__LINE__
,_("Invalid date in file \"%s\"\n"),redirector_sorted
);
514 if (getword(hora
,sizeof(hora
),&gwarea
,'\t')<0) {
515 debuga(__FILE__
,__LINE__
,_("Invalid time in file \"%s\"\n"),redirector_sorted
);
518 if (getword(ip
,sizeof(ip
),&gwarea
,'\t')<0) {
519 debuga(__FILE__
,__LINE__
,_("Invalid IP address in file \"%s\"\n"),redirector_sorted
);
522 if (getword_ptr(buf
,&url
,&gwarea
,'\t')<0) {
523 debuga(__FILE__
,__LINE__
,_("Invalid url in file \"%s\"\n"),redirector_sorted
);
526 if (getword(rule
,sizeof(rule
),&gwarea
,'\n')<0) {
527 debuga(__FILE__
,__LINE__
,_("Invalid rule in file \"%s\"\n"),redirector_sorted
);
531 uinfo
=userinfo_find_from_id(user
);
533 debuga(__FILE__
,__LINE__
,_("Unknown user ID %s in file \"%s\"\n"),user
,redirector_sorted
);
537 computedate(data2
/10000,(data2
/100)%10,data2
%100,&t
);
538 strftime(data
,sizeof(data
),"%x",&t
);
545 if (Ip2Name
&& !uinfo
->id_is_ip
) ip2name(oname
,sizeof(oname
));
549 if(strcmp(ouser
,user
) != 0) {
553 if(strcmp(oip
,ip
) != 0) {
556 if (Ip2Name
&& !uinfo
->id_is_ip
) ip2name(oname
,sizeof(oname
));
561 if(SquidGuardReportLimit
) {
562 if(strcmp(ouser2
,uinfo
->label
) == 0) {
565 if(count
>SquidGuardReportLimit
&& SquidGuardReportLimit
>0)
566 show_ignored_redirector(fp_ou
,count
-SquidGuardReportLimit
);
568 strcpy(ouser2
,uinfo
->label
);
570 if(count
> SquidGuardReportLimit
)
575 fprintf(fp_ou
,"<tr><td class=\"data2\">%s</td><td class=\"data2\">%s</td>",uinfo
->label
,ip
);
577 fputs("<tr><td class=\"data2\"></td><td class=\"data2\"></td>",fp_ou
);
578 fprintf(fp_ou
,"<td class=\"data2\">%s-%s</td><td class=\"data2\">",data
,hora
);
579 output_html_link(fp_ou
,url
,100);
580 fprintf(fp_ou
,"</td><td class=\"data2\">%s</td></tr>\n",rule
);
582 if (FileObject_Close(fp_in
)) {
583 debuga(__FILE__
,__LINE__
,_("Read error in \"%s\": %s\n"),redirector_sorted
,FileObject_GetLastCloseError());
586 longline_destroy(&line
);
588 if(count
>SquidGuardReportLimit
&& SquidGuardReportLimit
>0)
589 show_ignored_redirector(fp_ou
,count
-SquidGuardReportLimit
);
591 fputs("</table>\n",fp_ou
);
593 if (RedirectorErrors
>0)
595 fputs("<div class=\"warn\"><span>",fp_ou
);
596 fprintf(fp_ou
,ngettext("%d error found in the log file. Some entries may be missing.","%d errors found in the log file. Some entries may be missing.",RedirectorErrors
),RedirectorErrors
);
597 fputs("</span></div>\n",fp_ou
);
600 fputs("</div>\n",fp_ou
);
601 write_html_trailer(fp_ou
);
602 if (fclose(fp_ou
)==EOF
) {
603 debuga(__FILE__
,__LINE__
,_("Write error in \"%s\": %s\n"),report
,strerror(errno
));
607 if (!KeepTempLog
&& unlink(redirector_sorted
)) {
608 debuga(__FILE__
,__LINE__
,_("Cannot delete \"%s\": %s\n"),redirector_sorted
,strerror(errno
));