]> git.ipfire.org Git - thirdparty/sarg.git/blob - redirector.c
Merge commit '24eb624ea44bf4c82c602ce'
[thirdparty/sarg.git] / redirector.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2012
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 static char **files_done = NULL;
31 static int nfiles_done = 0;
32
33 //! The number of invalid lines found in the redirector report.
34 static int RedirectorErrors=0;
35
36 static void parse_log(FILE *fp_ou,char *buf)
37 {
38 char leks[5], sep[2], res[MAXLEN];
39 char hour[15];
40 char source[128], list[128];
41 char full_url[MAX_URL_LEN];
42 const char *url;
43 char user[MAX_USER_LEN];
44 char ip[45];
45 long long int lmon, lday, lyear;
46 int mon, day, year;
47 int idata=0;
48 bool id_is_ip;
49 struct getwordstruct gwarea;
50 struct getwordstruct gwarea1;
51 struct userinfostruct *uinfo;
52
53 getword_start(&gwarea,buf);
54 if(RedirectorLogFormat[0] != '\0') {
55 getword_start(&gwarea1,RedirectorLogFormat);
56 leks[0]='\0';
57 if (getword(leks,sizeof(leks),&gwarea1,'#')<0) {
58 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters before first tag)\n"));
59 exit(EXIT_FAILURE);
60 }
61 year=0;
62 mon=0;
63 day=0;
64 hour[0]='\0';
65 source[0]='\0';
66 list[0]='\0';
67 ip[0]='\0';
68 user[0]='\0';
69 full_url[0]='\0';
70 while(strcmp(leks,"end") != 0) {
71 if (getword(leks,sizeof(leks),&gwarea1,'#')<0) {
72 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (missing # at end of tag)\n"));
73 exit(EXIT_FAILURE);
74 }
75 if (getword(sep,sizeof(sep),&gwarea1,'#')<0) {
76 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters in column separator)\n"));
77 exit(EXIT_FAILURE);
78 }
79 if(strcmp(leks,"end") != 0) {
80 if (getword_limit(res,sizeof(res),&gwarea,sep[0])<0) {
81 debuga(_("Parsing of tag \"%s\" in redirector log %s returned no result\n"),leks,wentp);
82 RedirectorErrors++;
83 return;
84 }
85 if(strcmp(leks,"year") == 0) {
86 year=atoi(res);
87 } else if(strcmp(leks,"mon") == 0) {
88 mon=atoi(res);
89 } else if(strcmp(leks,"day") == 0) {
90 day=atoi(res);
91 } else if(strcmp(leks,"hour") == 0) {
92 if (strlen(res)>=sizeof(hour)) {
93 debuga(_("Hour string too long in redirector log file %s\n"),wentp);
94 RedirectorErrors++;
95 return;
96 }
97 strcpy(hour,res);
98 } else if(strcmp(leks,"source") == 0) {
99 if (strlen(res)>=sizeof(source)) {
100 debuga(_("Banning source name too long in redirector log file %s\n"),wentp);
101 RedirectorErrors++;
102 return;
103 }
104 strcpy(source,res);
105 } else if(strcmp(leks,"list") == 0) {
106 if (strlen(res)>=sizeof(list)) {
107 debuga(_("Banning list name too long in redirector log file %s\n"),wentp);
108 RedirectorErrors++;
109 return;
110 }
111 strcpy(list,res);
112 } else if(strcmp(leks,"ip") == 0) {
113 if (strlen(res)>=sizeof(ip)) {
114 debuga(_("IP address too long in redirector log file %s\n"),wentp);
115 RedirectorErrors++;
116 return;
117 }
118 strcpy(ip,res);
119 } else if(strcmp(leks,"user") == 0) {
120 if (strlen(res)>=sizeof(user)) {
121 debuga(_("User ID too long in redirector log file %s\n"),wentp);
122 RedirectorErrors++;
123 return;
124 }
125 strcpy(user,res);
126 } else if(strcmp(leks,"url") == 0) {
127 if (strlen(res)>=sizeof(full_url)) {
128 debuga(_("URL too long in redirector log file %s\n"),wentp);
129 RedirectorErrors++;
130 return;
131 }
132 strcpy(full_url,res);
133 }
134 }
135 }
136 } else {
137 if (getword_atoll(&lyear,&gwarea,'-')<0 || getword_atoll(&lmon,&gwarea,'-')<0 ||
138 getword_atoll(&lday,&gwarea,' ')<0) {
139 debuga(_("Invalid date found in file %s\n"),wentp);
140 RedirectorErrors++;
141 return;
142 }
143 year=(int)lyear;
144 mon=(int)lmon;
145 day=(int)lday;
146 if (getword(hour,sizeof(hour),&gwarea,' ')<0) {
147 debuga(_("Invalid time found in file %s\n"),wentp);
148 RedirectorErrors++;
149 return;
150 }
151 if (getword_skip(MAXLEN,&gwarea,'(')<0 || getword(source,sizeof(source),&gwarea,'/')<0) {
152 debuga(_("Invalid redirected source in file %s\n"),wentp);
153 RedirectorErrors++;
154 return;
155 }
156 if (getword(list,sizeof(list),&gwarea,'/')<0) {
157 debuga(_("Invalid redirected list in file %s\n"),wentp);
158 RedirectorErrors++;
159 return;
160 }
161 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword_limit(full_url,sizeof(full_url),&gwarea,' ')<0) {
162 debuga(_("Invalid URL in file %s\n"),wentp);
163 RedirectorErrors++;
164 return;
165 }
166 if (getword(ip,sizeof(ip),&gwarea,'/')<0) {
167 debuga(_("Invalid source IP in file %s\n"),wentp);
168 RedirectorErrors++;
169 return;
170 }
171 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
172 debuga(_("Invalid user in file %s\n"),wentp);
173 RedirectorErrors++;
174 return;
175 }
176 }
177 url=process_url(full_url,false);
178
179 //sprintf(warea,"%04d%02d%02d",year,mon,day);
180
181 if(RedirectorFilterOutDate) {
182 idata = year*10000+mon*100+day;
183 if(idata < dfrom || idata > duntil)
184 return;
185 }
186
187 if(UserIp) {
188 strcpy(user,ip);
189 id_is_ip=true;
190 } else {
191 id_is_ip=false;
192 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
193 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
194 strcpy(user,ip);
195 id_is_ip=true;
196 }
197 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
198 return;
199 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
200 strcpy(user,"everybody");
201 }
202 }
203 uinfo=userinfo_find_from_id(user);
204 if (!uinfo) {
205 uinfo=userinfo_create(user);
206 uinfo->id_is_ip=id_is_ip;
207 uinfo->no_report=true;
208 if(Ip2Name && id_is_ip) ip2name(user,sizeof(user));
209 user_find(uinfo->label,MAX_USER_LEN, user);
210 }
211 fprintf(fp_ou,"%s\t%04d%02d%02d\t%s\t%s\t%s\t",uinfo->id,year,mon,day,hour,ip,url);
212 if (source[0] && list[0])
213 fprintf(fp_ou,"%s/%s\n",source,list);
214 else if (source[0])
215 fprintf(fp_ou,"%s\n",source);
216 else
217 fprintf(fp_ou,"%s\n",list);
218 redirector_count++;
219 }
220
221 static void read_log(const char *wentp, FILE *fp_ou,int dfrom,int duntil)
222 {
223 FILE *fp_in = NULL;
224 char *buf;
225 int i;
226 longline line;
227
228 if(debug) {
229 debuga(_("Reading redirector log file %s\n"),wentp);
230 }
231
232 /* With squidGuard, you can log groups in only one log file.
233 We must parse each log files only one time. Example :
234 dest porn {
235 domainlist porn/domains
236 urllist porn/urls
237 log file1.log
238 }
239 dest aggressive {
240 domainlist aggressive/domains
241 urllist aggressive/urls
242 log file2.log
243 }
244 dest audio-video {
245 domainlist audio-video/domains
246 urllist audio-video/urls
247 log file1.log
248 }
249 */
250 for (i=0; i<nfiles_done; i++)
251 if (!strcmp(wentp, files_done[i])) return;
252
253 nfiles_done++;
254 files_done = realloc(files_done, nfiles_done*sizeof(char *));
255 if (!files_done) {
256 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
257 exit(EXIT_FAILURE);
258 }
259 files_done[nfiles_done-1] = strdup(wentp);
260 if (!files_done[nfiles_done-1]) {
261 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
262 exit(EXIT_FAILURE);
263 }
264
265 if ((fp_in=fopen(wentp,"r"))==NULL) {
266 debuga(_("(squidguard) Cannot open log file %s\n"),wentp);
267 exit(EXIT_FAILURE);
268 }
269
270 if ((line=longline_create())==NULL) {
271 debuga(_("Not enough memory to read the redirector log\n"));
272 exit(EXIT_FAILURE);
273 }
274
275 while ((buf=longline_read(fp_in,line)) != NULL) {
276 parse_log(fp_ou,buf);
277 }
278 fclose(fp_in);
279 longline_destroy(&line);
280 return;
281 }
282
283
284 void redirector_log(void)
285 {
286 FILE *fp_ou = NULL, *fp_guard = NULL;
287 char buf[MAXLEN];
288 char guard_in[MAXLEN];
289 char guard_ou[MAXLEN];
290 char logdir[MAXLEN];
291 char user[MAXLEN];
292 char tmp6[MAXLEN];
293 int i;
294 int y;
295 int cstatus;
296 int dfrom, duntil;
297 char *str;
298 char *str2;
299
300 str2 = user;
301
302 if(SquidGuardConf[0] == '\0' && NRedirectorLogs == 0) {
303 if (debugz) debugaz(_("No redirector logs provided to produce that kind of report\n"));
304 return;
305 }
306
307 snprintf(guard_in,sizeof(guard_in),"%s/redirector.int_unsort",tmp);
308 snprintf(guard_ou,sizeof(guard_ou),"%s/redirector.int_log",tmp);
309 if((fp_ou=fopen(guard_in,"a"))==NULL) {
310 debuga(_("(squidguard) Cannot open log file %s\n"),guard_in);
311 exit(EXIT_FAILURE);
312 }
313
314 dfrom=(period.start.tm_year+1900)*10000+(period.start.tm_mon+1)*100+period.start.tm_mday;
315 duntil=(period.end.tm_year+1900)*10000+(period.end.tm_mon+1)*100+period.end.tm_mday;
316
317 if (NRedirectorLogs>0) {
318 for (i=0 ; i<NRedirectorLogs ; i++)
319 read_log(RedirectorLogs[i],fp_ou,dfrom,duntil);
320 } else {
321 if(access(SquidGuardConf, R_OK) != 0) {
322 debuga(_("Cannot open squidGuard config file: %s\n"),SquidGuardConf);
323 exit(EXIT_FAILURE);
324 }
325
326 if((fp_guard=fopen(SquidGuardConf,"r"))==NULL) {
327 debuga(_("(squidguard) Cannot open log file %s\n"),SquidGuardConf);
328 exit(EXIT_FAILURE);
329 }
330
331 logdir[0]=0;
332 while(fgets(buf,sizeof(buf),fp_guard)!=NULL) {
333 fixendofline(buf);
334 if((str=get_param_value("logdir",buf))!=NULL) {
335 /*
336 We want to tolerate spaces inside the directory name but we must also
337 remove the trailing spaces left by the editor after the directory name.
338 This should not be a problem as nobody use a file name with trailing spaces.
339 */
340 for (y=strlen(str)-1 ; y>=0 && (unsigned char)str[y]<=' ' ; y--);
341 if (y>=sizeof(logdir)-1) y=sizeof(logdir)-2;
342 logdir[y+1] = '\0';
343 while (y>=0) {
344 logdir[y] = str[y];
345 y--;
346 }
347 } else if((str=get_param_value("log",buf))!=NULL) {
348 if((str2=get_param_value("anonymous",str))!=NULL)
349 str=str2;
350
351 /*
352 If logdir is defined, we prepend it to the log file name, otherwise, we assume
353 the log directive provides an absolute file name to the log file. Therefore,
354 we don't need to add an additionnal / at the beginning of the log file name.
355 */
356 y=(logdir[0]) ? sprintf(wentp,"%s/",logdir) : 0;
357 /*
358 Spaces are allowed in the name of the log file. The file name ends at the first #
359 because it is assumed it is an end of line comment. Any space before the # is then
360 removed. Any control character (i.e. a character with a code lower than 32) ends
361 the file name. That includes the terminating zero.
362 */
363 while((unsigned char)*str>=' ' && *str!='#' && y<sizeof(wentp)-1)
364 wentp[y++]=*str++;
365 if(*str=='#') {
366 str--;
367 while(*str==' ' && y>0) {
368 str--;
369 y--;
370 }
371 }
372 wentp[y]=0;
373 read_log(wentp,fp_ou,dfrom,duntil);
374 }
375 }
376 }
377
378 if (fp_guard) fclose(fp_guard);
379 if (fp_ou) fclose(fp_ou);
380
381 if (files_done) {
382 for (y=0; y<nfiles_done; y++)
383 if (files_done[y]) free(files_done[y]);
384 free(files_done);
385 }
386
387 if(debug) {
388 debuga(_("Sorting file: %s\n"),guard_ou);
389 }
390
391 if (snprintf(tmp6,sizeof(tmp6),"sort -t \"\t\" -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in, guard_ou)>=sizeof(tmp6)) {
392 debuga(_("Sort command too long when sorting file \"%s\" to \"%s\"\n"),guard_in,guard_ou);
393 exit(EXIT_FAILURE);
394 }
395 cstatus=system(tmp6);
396 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
397 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
398 debuga(_("sort command: %s\n"),tmp6);
399 exit(EXIT_FAILURE);
400 }
401
402 if (unlink(guard_in)) {
403 debuga(_("Cannot delete %s - %s\n"),guard_in,strerror(errno));
404 exit(EXIT_FAILURE);
405 }
406 return;
407 }
408
409 static void show_ignored_redirector(FILE *fp_ou,int count)
410 {
411 char ignored[80];
412
413 snprintf(ignored,sizeof(ignored),ngettext("%d more redirector entry not shown here&hellip;","%d more redirector entries not shown here&hellip;",count),count);
414 fprintf(fp_ou,"<tr><td class=\"data\"></td><td class=\"data\"></td><td class=\"data\"></td><td class=\"data2 more\">%s</td><td class=\"data\"></td></tr>\n",ignored);
415 }
416
417 void redirector_report(void)
418 {
419 FILE *fp_in = NULL, *fp_ou = NULL;
420
421 char *buf;
422 char *url;
423 char squidguard_in[MAXLEN];
424 char report[MAXLEN];
425 char ip[45];
426 char rule[255];
427 char oip[45];
428 char user[MAXLEN];
429 char ouser[MAXLEN];
430 char data[15];
431 char hora[15];
432 char ouser2[255];
433 char oname[MAXLEN];
434 bool z=false;
435 int count=0;
436 long long int data2;
437 bool new_user;
438 struct getwordstruct gwarea;
439 const struct userinfostruct *uinfo;
440 struct tm t;
441 longline line;
442
443 ouser[0]='\0';
444 ouser2[0]='\0';
445
446 snprintf(squidguard_in,sizeof(squidguard_in),"%s/redirector.int_log",tmp);
447 if(!redirector_count) {
448 unlink(squidguard_in);
449 if (debugz) debugaz(_("Redirector report not generated because it is empty\n"));
450 return;
451 }
452
453 snprintf(report,sizeof(report),"%s/redirector.html",outdirname);
454
455 if((fp_in=fopen(squidguard_in,"r"))==NULL) {
456 debuga(_("(squidguard) Cannot open log file %s\n"),squidguard_in);
457 exit(EXIT_FAILURE);
458 }
459
460 if((fp_ou=fopen(report,"w"))==NULL) {
461 debuga(_("(squidguard) Cannot open log file %s\n"),report);
462 exit(EXIT_FAILURE);
463 }
464
465 if ((line=longline_create())==NULL) {
466 debuga(_("Not enough memory to read the processed redirector log\n"));
467 exit(EXIT_FAILURE);
468 }
469
470 write_html_header(fp_ou,(IndexTree == INDEX_TREE_DATE) ? 3 : 1,_("Redirector report"),HTML_JS_NONE);
471 fputs("<tr><td class=\"header_c\">",fp_ou);
472 fprintf(fp_ou,_("Period: %s"),period.html);
473 fputs("</td></tr>\n",fp_ou);
474 fprintf(fp_ou,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Redirector report"));
475 close_html_header(fp_ou);
476
477 fputs("<div class=\"report\"><table cellpadding=1 cellspacing=2>\n",fp_ou);
478 fprintf(fp_ou,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"),_("RULE"));
479
480 while((buf=longline_read(fp_in,line))!=NULL) {
481 getword_start(&gwarea,buf);
482 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
483 debuga(_("Invalid user in file %s\n"),squidguard_in);
484 exit(EXIT_FAILURE);
485 }
486 if (getword_atoll(&data2,&gwarea,'\t')<0) {
487 debuga(_("Invalid date in file %s\n"),squidguard_in);
488 exit(EXIT_FAILURE);
489 }
490 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
491 debuga(_("Invalid time in file %s\n"),squidguard_in);
492 exit(EXIT_FAILURE);
493 }
494 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
495 debuga(_("Invalid IP address in file %s\n"),squidguard_in);
496 exit(EXIT_FAILURE);
497 }
498 if (getword_ptr(buf,&url,&gwarea,'\t')<0) {
499 debuga(_("Invalid URL in file %s\n"),squidguard_in);
500 exit(EXIT_FAILURE);
501 }
502 if (getword(rule,sizeof(rule),&gwarea,'\n')<0) {
503 debuga(_("Invalid rule in file %s\n"),squidguard_in);
504 exit(EXIT_FAILURE);
505 }
506
507 uinfo=userinfo_find_from_id(user);
508 if (!uinfo) {
509 debuga(_("Unknown user ID %s in file %s\n"),user,squidguard_in);
510 exit(EXIT_FAILURE);
511 }
512
513 computedate(data2/10000,(data2/100)%10,data2%100,&t);
514 strftime(data,sizeof(data),"%x",&t);
515
516 new_user=false;
517 if(!z) {
518 strcpy(ouser,user);
519 strcpy(oip,ip);
520 strcpy(oname,ip);
521 if (Ip2Name && !uinfo->id_is_ip) ip2name(oname,sizeof(oname));
522 z=true;
523 new_user=true;
524 } else {
525 if(strcmp(ouser,user) != 0) {
526 strcpy(ouser,user);
527 new_user=true;
528 }
529 if(strcmp(oip,ip) != 0) {
530 strcpy(oip,ip);
531 strcpy(oname,ip);
532 if (Ip2Name && !uinfo->id_is_ip) ip2name(oname,sizeof(oname));
533 new_user=true;
534 }
535 }
536
537 if(SquidGuardReportLimit) {
538 if(strcmp(ouser2,uinfo->label) == 0) {
539 count++;
540 } else {
541 if(count>SquidGuardReportLimit && SquidGuardReportLimit>0)
542 show_ignored_redirector(fp_ou,count-SquidGuardReportLimit);
543 count=1;
544 strcpy(ouser2,uinfo->label);
545 }
546 if(count > SquidGuardReportLimit)
547 continue;
548 }
549
550 if (new_user)
551 fprintf(fp_ou,"<tr><td class=\"data2\">%s</td><td class=\"data2\">%s</td>",uinfo->label,ip);
552 else
553 fputs("<tr><td class=\"data2\"></td><td class=\"data2\"></td>",fp_ou);
554 fprintf(fp_ou,"<td class=\"data2\">%s-%s</td><td class=\"data2\">",data,hora);
555 output_html_link(fp_ou,url,100);
556 fprintf(fp_ou,"</td><td class=\"data2\">%s</td></tr>\n",rule);
557 }
558 fclose(fp_in);
559 longline_destroy(&line);
560
561 if(count>SquidGuardReportLimit && SquidGuardReportLimit>0)
562 show_ignored_redirector(fp_ou,count-SquidGuardReportLimit);
563
564 fputs("</table>\n",fp_ou);
565
566 if (RedirectorErrors>0)
567 {
568 fputs("<div class=\"warn\"><span>",fp_ou);
569 fprintf(fp_ou,ngettext("%d error found in the log file. Some entries may be missing.","%d errors found in the log file. Some entries may be missing.",RedirectorErrors),RedirectorErrors);
570 fputs("</span></div>\n",fp_ou);
571 }
572
573 fputs("</div>\n",fp_ou);
574 if (write_html_trailer(fp_ou)<0)
575 debuga(_("Write error in file %s\n"),report);
576 if (fclose(fp_ou)==EOF)
577 debuga(_("Failed to close file %s - %s\n"),report,strerror(errno));
578
579 if (unlink(squidguard_in)) {
580 debuga(_("Cannot delete %s - %s\n"),squidguard_in,strerror(errno));
581 exit(EXIT_FAILURE);
582 }
583
584 return;
585 }