]> git.ipfire.org Git - thirdparty/sarg.git/blob - redirector.c
Explain the reason when a file cannot be opened
[thirdparty/sarg.git] / redirector.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2012
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 static char **files_done = NULL;
31 static int nfiles_done = 0;
32
33 //! The number of invalid lines found in the redirector report.
34 static int RedirectorErrors=0;
35 //! The file containing the sorted entries.
36 static char redirector_sorted[MAXLEN]="";
37
38 static void parse_log(FILE *fp_ou,char *buf)
39 {
40 char leks[5], sep[2], res[MAXLEN];
41 char hour[15];
42 char source[128], list[128];
43 char full_url[MAX_URL_LEN];
44 const char *url;
45 char user[MAX_USER_LEN];
46 char ip[45];
47 long long int lmon, lday, lyear;
48 int mon, day, year;
49 int idata=0;
50 bool id_is_ip;
51 struct getwordstruct gwarea;
52 struct getwordstruct gwarea1;
53 struct userinfostruct *uinfo;
54
55 getword_start(&gwarea,buf);
56 if(RedirectorLogFormat[0] != '\0') {
57 getword_start(&gwarea1,RedirectorLogFormat);
58 leks[0]='\0';
59 if (getword(leks,sizeof(leks),&gwarea1,'#')<0) {
60 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters before first tag)\n"));
61 exit(EXIT_FAILURE);
62 }
63 year=0;
64 mon=0;
65 day=0;
66 hour[0]='\0';
67 source[0]='\0';
68 list[0]='\0';
69 ip[0]='\0';
70 user[0]='\0';
71 full_url[0]='\0';
72 while(strcmp(leks,"end") != 0) {
73 if (getword(leks,sizeof(leks),&gwarea1,'#')<0) {
74 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (missing # at end of tag)\n"));
75 exit(EXIT_FAILURE);
76 }
77 if (getword(sep,sizeof(sep),&gwarea1,'#')<0) {
78 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters in column separator)\n"));
79 exit(EXIT_FAILURE);
80 }
81 if(strcmp(leks,"end") != 0) {
82 if (getword_limit(res,sizeof(res),&gwarea,sep[0])<0) {
83 debuga(_("Parsing of tag \"%s\" in redirector log %s returned no result\n"),leks,wentp);
84 RedirectorErrors++;
85 return;
86 }
87 if(strcmp(leks,"year") == 0) {
88 year=atoi(res);
89 } else if(strcmp(leks,"mon") == 0) {
90 mon=atoi(res);
91 } else if(strcmp(leks,"day") == 0) {
92 day=atoi(res);
93 } else if(strcmp(leks,"hour") == 0) {
94 if (strlen(res)>=sizeof(hour)) {
95 debuga(_("Hour string too long in redirector log file %s\n"),wentp);
96 RedirectorErrors++;
97 return;
98 }
99 strcpy(hour,res);
100 } else if(strcmp(leks,"source") == 0) {
101 if (strlen(res)>=sizeof(source)) {
102 debuga(_("Banning source name too long in redirector log file %s\n"),wentp);
103 RedirectorErrors++;
104 return;
105 }
106 strcpy(source,res);
107 } else if(strcmp(leks,"list") == 0) {
108 if (strlen(res)>=sizeof(list)) {
109 debuga(_("Banning list name too long in redirector log file %s\n"),wentp);
110 RedirectorErrors++;
111 return;
112 }
113 strcpy(list,res);
114 } else if(strcmp(leks,"ip") == 0) {
115 if (strlen(res)>=sizeof(ip)) {
116 debuga(_("IP address too long in redirector log file %s\n"),wentp);
117 RedirectorErrors++;
118 return;
119 }
120 strcpy(ip,res);
121 } else if(strcmp(leks,"user") == 0) {
122 if (strlen(res)>=sizeof(user)) {
123 debuga(_("User ID too long in redirector log file %s\n"),wentp);
124 RedirectorErrors++;
125 return;
126 }
127 strcpy(user,res);
128 } else if(strcmp(leks,"url") == 0) {
129 /*
130 * Don't worry about the url being truncated as we only keep the host name
131 * any way...
132 */
133 safe_strcpy(full_url,res,sizeof(full_url));
134 }
135 }
136 }
137 } else {
138 if (getword_atoll(&lyear,&gwarea,'-')<0 || getword_atoll(&lmon,&gwarea,'-')<0 ||
139 getword_atoll(&lday,&gwarea,' ')<0) {
140 debuga(_("Invalid date found in file %s\n"),wentp);
141 RedirectorErrors++;
142 return;
143 }
144 year=(int)lyear;
145 mon=(int)lmon;
146 day=(int)lday;
147 if (getword(hour,sizeof(hour),&gwarea,' ')<0) {
148 debuga(_("Invalid time found in file %s\n"),wentp);
149 RedirectorErrors++;
150 return;
151 }
152 if (getword_skip(MAXLEN,&gwarea,'(')<0 || getword(source,sizeof(source),&gwarea,'/')<0) {
153 debuga(_("Invalid redirected source in file %s\n"),wentp);
154 RedirectorErrors++;
155 return;
156 }
157 if (getword(list,sizeof(list),&gwarea,'/')<0) {
158 debuga(_("Invalid redirected list in file %s\n"),wentp);
159 RedirectorErrors++;
160 return;
161 }
162 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword_limit(full_url,sizeof(full_url),&gwarea,' ')<0) {
163 debuga(_("Invalid URL in file %s\n"),wentp);
164 RedirectorErrors++;
165 return;
166 }
167 if (getword(ip,sizeof(ip),&gwarea,'/')<0) {
168 debuga(_("Invalid source IP in file %s\n"),wentp);
169 RedirectorErrors++;
170 return;
171 }
172 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
173 debuga(_("Invalid user in file %s\n"),wentp);
174 RedirectorErrors++;
175 return;
176 }
177 }
178 url=process_url(full_url,false);
179
180 //sprintf(warea,"%04d%02d%02d",year,mon,day);
181
182 if(RedirectorFilterOutDate) {
183 idata = year*10000+mon*100+day;
184 if(idata < dfrom || idata > duntil)
185 return;
186 }
187
188 if(UserIp) {
189 strcpy(user,ip);
190 id_is_ip=true;
191 } else {
192 id_is_ip=false;
193 if (user[0]=='\0' || (user[1]=='\0' && (user[0]=='-' || user[0]==' '))) {
194 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
195 strcpy(user,ip);
196 id_is_ip=true;
197 }
198 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
199 return;
200 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
201 strcpy(user,"everybody");
202 }
203 }
204 uinfo=userinfo_find_from_id(user);
205 if (!uinfo) {
206 uinfo=userinfo_create(user);
207 uinfo->id_is_ip=id_is_ip;
208 uinfo->no_report=true;
209 if(Ip2Name && id_is_ip) ip2name(user,sizeof(user));
210 user_find(uinfo->label,MAX_USER_LEN, user);
211 }
212 fprintf(fp_ou,"%s\t%04d%02d%02d\t%s\t%s\t%s\t",uinfo->id,year,mon,day,hour,ip,url);
213 if (source[0] && list[0])
214 fprintf(fp_ou,"%s/%s\n",source,list);
215 else if (source[0])
216 fprintf(fp_ou,"%s\n",source);
217 else
218 fprintf(fp_ou,"%s\n",list);
219 redirector_count++;
220 }
221
222 static void read_log(const char *wentp, FILE *fp_ou,int dfrom,int duntil)
223 {
224 FILE *fp_in = NULL;
225 char *buf;
226 int i;
227 longline line;
228
229 if(debug) {
230 debuga(_("Reading redirector log file %s\n"),wentp);
231 }
232
233 /* With squidGuard, you can log groups in only one log file.
234 We must parse each log files only one time. Example :
235 dest porn {
236 domainlist porn/domains
237 urllist porn/urls
238 log file1.log
239 }
240 dest aggressive {
241 domainlist aggressive/domains
242 urllist aggressive/urls
243 log file2.log
244 }
245 dest audio-video {
246 domainlist audio-video/domains
247 urllist audio-video/urls
248 log file1.log
249 }
250 */
251 for (i=0; i<nfiles_done; i++)
252 if (!strcmp(wentp, files_done[i])) return;
253
254 nfiles_done++;
255 files_done = realloc(files_done, nfiles_done*sizeof(char *));
256 if (!files_done) {
257 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
258 exit(EXIT_FAILURE);
259 }
260 files_done[nfiles_done-1] = strdup(wentp);
261 if (!files_done[nfiles_done-1]) {
262 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
263 exit(EXIT_FAILURE);
264 }
265
266 if ((fp_in=fopen(wentp,"r"))==NULL) {
267 debuga(_("(squidguard) Cannot open log file %s: %s\n"),wentp,strerror(errno));
268 exit(EXIT_FAILURE);
269 }
270
271 if ((line=longline_create())==NULL) {
272 debuga(_("Not enough memory to read the redirector log\n"));
273 exit(EXIT_FAILURE);
274 }
275
276 while ((buf=longline_read(fp_in,line)) != NULL) {
277 parse_log(fp_ou,buf);
278 }
279 fclose(fp_in);
280 longline_destroy(&line);
281 return;
282 }
283
284
285 void redirector_log(void)
286 {
287 FILE *fp_ou = NULL, *fp_guard = NULL;
288 char buf[MAXLEN];
289 char guard_in[MAXLEN];
290 char logdir[MAXLEN];
291 char user[MAXLEN];
292 char tmp6[MAXLEN];
293 int i;
294 int y;
295 int cstatus;
296 int dfrom, duntil;
297 char *str;
298 char *str2;
299
300 str2 = user;
301
302 if(SquidGuardConf[0] == '\0' && NRedirectorLogs == 0) {
303 if (debugz) debugaz(_("No redirector logs provided to produce that kind of report\n"));
304 return;
305 }
306
307 snprintf(guard_in,sizeof(guard_in),"%s/redirector.int_unsort",tmp);
308 if((fp_ou=fopen(guard_in,"w"))==NULL) {
309 debuga(_("(squidguard) Cannot open log file %s: %s\n"),guard_in,strerror(errno));
310 exit(EXIT_FAILURE);
311 }
312
313 dfrom=(period.start.tm_year+1900)*10000+(period.start.tm_mon+1)*100+period.start.tm_mday;
314 duntil=(period.end.tm_year+1900)*10000+(period.end.tm_mon+1)*100+period.end.tm_mday;
315
316 if (NRedirectorLogs>0) {
317 for (i=0 ; i<NRedirectorLogs ; i++)
318 read_log(RedirectorLogs[i],fp_ou,dfrom,duntil);
319 } else {
320 if(access(SquidGuardConf, R_OK) != 0) {
321 debuga(_("Cannot open squidGuard config file: %s\n"),SquidGuardConf);
322 exit(EXIT_FAILURE);
323 }
324
325 if((fp_guard=fopen(SquidGuardConf,"r"))==NULL) {
326 debuga(_("(squidguard) Cannot open log file %s: %s\n"),SquidGuardConf,strerror(errno));
327 exit(EXIT_FAILURE);
328 }
329
330 logdir[0]=0;
331 while(fgets(buf,sizeof(buf),fp_guard)!=NULL) {
332 fixendofline(buf);
333 if((str=get_param_value("logdir",buf))!=NULL) {
334 /*
335 We want to tolerate spaces inside the directory name but we must also
336 remove the trailing spaces left by the editor after the directory name.
337 This should not be a problem as nobody use a file name with trailing spaces.
338 */
339 for (y=strlen(str)-1 ; y>=0 && (unsigned char)str[y]<=' ' ; y--);
340 if (y>=sizeof(logdir)-1) y=sizeof(logdir)-2;
341 logdir[y+1] = '\0';
342 while (y>=0) {
343 logdir[y] = str[y];
344 y--;
345 }
346 } else if((str=get_param_value("log",buf))!=NULL) {
347 if((str2=get_param_value("anonymous",str))!=NULL)
348 str=str2;
349
350 /*
351 If logdir is defined, we prepend it to the log file name, otherwise, we assume
352 the log directive provides an absolute file name to the log file. Therefore,
353 we don't need to add an additionnal / at the beginning of the log file name.
354 */
355 y=(logdir[0]) ? sprintf(wentp,"%s/",logdir) : 0;
356 /*
357 Spaces are allowed in the name of the log file. The file name ends at the first #
358 because it is assumed it is an end of line comment. Any space before the # is then
359 removed. Any control character (i.e. a character with a code lower than 32) ends
360 the file name. That includes the terminating zero.
361 */
362 while((unsigned char)*str>=' ' && *str!='#' && y<sizeof(wentp)-1)
363 wentp[y++]=*str++;
364 if(*str=='#') {
365 str--;
366 while(*str==' ' && y>0) {
367 str--;
368 y--;
369 }
370 }
371 wentp[y]=0;
372 read_log(wentp,fp_ou,dfrom,duntil);
373 }
374 }
375 }
376
377 if (fp_guard) fclose(fp_guard);
378 if (fp_ou) fclose(fp_ou);
379
380 if (files_done) {
381 for (y=0; y<nfiles_done; y++)
382 if (files_done[y]) free(files_done[y]);
383 free(files_done);
384 }
385
386 if (redirector_count) {
387 snprintf(redirector_sorted,sizeof(redirector_sorted),"%s/redirector.int_log",tmp);
388 if(debug) {
389 debuga(_("Sorting file: %s\n"),redirector_sorted);
390 }
391
392 if (snprintf(tmp6,sizeof(tmp6),"sort -t \"\t\" -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in, redirector_sorted)>=sizeof(tmp6)) {
393 debuga(_("Sort command too long when sorting file \"%s\" to \"%s\"\n"),guard_in,redirector_sorted);
394 exit(EXIT_FAILURE);
395 }
396 cstatus=system(tmp6);
397 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
398 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
399 debuga(_("sort command: %s\n"),tmp6);
400 exit(EXIT_FAILURE);
401 }
402 }
403
404 if (!KeepTempLog && unlink(guard_in)) {
405 debuga(_("Cannot delete \"%s\": %s\n"),guard_in,strerror(errno));
406 exit(EXIT_FAILURE);
407 }
408 return;
409 }
410
411 static void show_ignored_redirector(FILE *fp_ou,int count)
412 {
413 char ignored[80];
414
415 snprintf(ignored,sizeof(ignored),ngettext("%d more redirector entry not shown here&hellip;","%d more redirector entries not shown here&hellip;",count),count);
416 fprintf(fp_ou,"<tr><td class=\"data\"></td><td class=\"data\"></td><td class=\"data\"></td><td class=\"data2 more\">%s</td><td class=\"data\"></td></tr>\n",ignored);
417 }
418
419 void redirector_report(void)
420 {
421 FILE *fp_in = NULL, *fp_ou = NULL;
422
423 char *buf;
424 char *url;
425 char report[MAXLEN];
426 char ip[45];
427 char rule[255];
428 char oip[45];
429 char user[MAXLEN];
430 char ouser[MAXLEN];
431 char data[15];
432 char hora[15];
433 char ouser2[255];
434 char oname[MAXLEN];
435 bool z=false;
436 int count=0;
437 long long int data2;
438 bool new_user;
439 struct getwordstruct gwarea;
440 const struct userinfostruct *uinfo;
441 struct tm t;
442 longline line;
443
444 ouser[0]='\0';
445 ouser2[0]='\0';
446
447 if(!redirector_count) {
448 if (debugz) {
449 if (redirector_sorted[0])
450 debugaz(_("Redirector report not generated because it is empty\n"));
451 }
452 return;
453 }
454
455 snprintf(report,sizeof(report),"%s/redirector.html",outdirname);
456
457 if((fp_in=fopen(redirector_sorted,"r"))==NULL) {
458 debuga(_("(squidguard) Cannot open log file %s: %s\n"),redirector_sorted,strerror(errno));
459 exit(EXIT_FAILURE);
460 }
461
462 if((fp_ou=fopen(report,"w"))==NULL) {
463 debuga(_("(squidguard) Cannot open log file %s: %s\n"),report,strerror(errno));
464 exit(EXIT_FAILURE);
465 }
466
467 if ((line=longline_create())==NULL) {
468 debuga(_("Not enough memory to read the processed redirector log\n"));
469 exit(EXIT_FAILURE);
470 }
471
472 write_html_header(fp_ou,(IndexTree == INDEX_TREE_DATE) ? 3 : 1,_("Redirector report"),HTML_JS_NONE);
473 fputs("<tr><td class=\"header_c\">",fp_ou);
474 fprintf(fp_ou,_("Period: %s"),period.html);
475 fputs("</td></tr>\n",fp_ou);
476 fprintf(fp_ou,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Redirector report"));
477 close_html_header(fp_ou);
478
479 fputs("<div class=\"report\"><table cellpadding=1 cellspacing=2>\n",fp_ou);
480 fprintf(fp_ou,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"),_("RULE"));
481
482 while((buf=longline_read(fp_in,line))!=NULL) {
483 getword_start(&gwarea,buf);
484 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
485 debuga(_("Invalid user in file %s\n"),redirector_sorted);
486 exit(EXIT_FAILURE);
487 }
488 if (getword_atoll(&data2,&gwarea,'\t')<0) {
489 debuga(_("Invalid date in file %s\n"),redirector_sorted);
490 exit(EXIT_FAILURE);
491 }
492 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
493 debuga(_("Invalid time in file %s\n"),redirector_sorted);
494 exit(EXIT_FAILURE);
495 }
496 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
497 debuga(_("Invalid IP address in file %s\n"),redirector_sorted);
498 exit(EXIT_FAILURE);
499 }
500 if (getword_ptr(buf,&url,&gwarea,'\t')<0) {
501 debuga(_("Invalid URL in file %s\n"),redirector_sorted);
502 exit(EXIT_FAILURE);
503 }
504 if (getword(rule,sizeof(rule),&gwarea,'\n')<0) {
505 debuga(_("Invalid rule in file %s\n"),redirector_sorted);
506 exit(EXIT_FAILURE);
507 }
508
509 uinfo=userinfo_find_from_id(user);
510 if (!uinfo) {
511 debuga(_("Unknown user ID %s in file %s\n"),user,redirector_sorted);
512 exit(EXIT_FAILURE);
513 }
514
515 computedate(data2/10000,(data2/100)%10,data2%100,&t);
516 strftime(data,sizeof(data),"%x",&t);
517
518 new_user=false;
519 if(!z) {
520 strcpy(ouser,user);
521 strcpy(oip,ip);
522 strcpy(oname,ip);
523 if (Ip2Name && !uinfo->id_is_ip) ip2name(oname,sizeof(oname));
524 z=true;
525 new_user=true;
526 } else {
527 if(strcmp(ouser,user) != 0) {
528 strcpy(ouser,user);
529 new_user=true;
530 }
531 if(strcmp(oip,ip) != 0) {
532 strcpy(oip,ip);
533 strcpy(oname,ip);
534 if (Ip2Name && !uinfo->id_is_ip) ip2name(oname,sizeof(oname));
535 new_user=true;
536 }
537 }
538
539 if(SquidGuardReportLimit) {
540 if(strcmp(ouser2,uinfo->label) == 0) {
541 count++;
542 } else {
543 if(count>SquidGuardReportLimit && SquidGuardReportLimit>0)
544 show_ignored_redirector(fp_ou,count-SquidGuardReportLimit);
545 count=1;
546 strcpy(ouser2,uinfo->label);
547 }
548 if(count > SquidGuardReportLimit)
549 continue;
550 }
551
552 if (new_user)
553 fprintf(fp_ou,"<tr><td class=\"data2\">%s</td><td class=\"data2\">%s</td>",uinfo->label,ip);
554 else
555 fputs("<tr><td class=\"data2\"></td><td class=\"data2\"></td>",fp_ou);
556 fprintf(fp_ou,"<td class=\"data2\">%s-%s</td><td class=\"data2\">",data,hora);
557 output_html_link(fp_ou,url,100);
558 fprintf(fp_ou,"</td><td class=\"data2\">%s</td></tr>\n",rule);
559 }
560 fclose(fp_in);
561 longline_destroy(&line);
562
563 if(count>SquidGuardReportLimit && SquidGuardReportLimit>0)
564 show_ignored_redirector(fp_ou,count-SquidGuardReportLimit);
565
566 fputs("</table>\n",fp_ou);
567
568 if (RedirectorErrors>0)
569 {
570 fputs("<div class=\"warn\"><span>",fp_ou);
571 fprintf(fp_ou,ngettext("%d error found in the log file. Some entries may be missing.","%d errors found in the log file. Some entries may be missing.",RedirectorErrors),RedirectorErrors);
572 fputs("</span></div>\n",fp_ou);
573 }
574
575 fputs("</div>\n",fp_ou);
576 if (write_html_trailer(fp_ou)<0)
577 debuga(_("Write error in file %s\n"),report);
578 if (fclose(fp_ou)==EOF)
579 debuga(_("Failed to close file %s - %s\n"),report,strerror(errno));
580
581 if (!KeepTempLog && unlink(redirector_sorted)) {
582 debuga(_("Cannot delete \"%s\": %s\n"),redirector_sorted,strerror(errno));
583 exit(EXIT_FAILURE);
584 }
585
586 return;
587 }