]> git.ipfire.org Git - thirdparty/sarg.git/blob - redirector.c
Merge messages about IP addresses.
[thirdparty/sarg.git] / redirector.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2015
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 static char **files_done = NULL;
31 static int nfiles_done = 0;
32
33 //! The number of invalid lines found in the redirector report.
34 static int RedirectorErrors=0;
35 //! The file containing the sorted entries.
36 static char redirector_sorted[MAXLEN]="";
37
38 static void parse_log(FILE *fp_ou,char *buf)
39 {
40 char leks[5], sep[2], res[MAXLEN];
41 char hour[15];
42 char source[128], list[128];
43 char full_url[MAX_URL_LEN];
44 const char *url;
45 char user[MAX_USER_LEN];
46 char ip[45];
47 char userlabel[MAX_USER_LEN];
48 long long int lmon, lday, lyear;
49 int mon, day, year;
50 int idata=0;
51 bool id_is_ip;
52 struct getwordstruct gwarea;
53 struct getwordstruct gwarea1;
54 struct userinfostruct *uinfo;
55
56 getword_start(&gwarea,buf);
57 if(RedirectorLogFormat[0] != '\0') {
58 getword_start(&gwarea1,RedirectorLogFormat);
59 leks[0]='\0';
60 if (getword(leks,sizeof(leks),&gwarea1,'#')<0) {
61 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters before first tag)\n"));
62 exit(EXIT_FAILURE);
63 }
64 year=0;
65 mon=0;
66 day=0;
67 hour[0]='\0';
68 source[0]='\0';
69 list[0]='\0';
70 ip[0]='\0';
71 user[0]='\0';
72 full_url[0]='\0';
73 while(strcmp(leks,"end") != 0) {
74 if (getword(leks,sizeof(leks),&gwarea1,'#')<0) {
75 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (missing # at end of tag)\n"));
76 exit(EXIT_FAILURE);
77 }
78 if (getword(sep,sizeof(sep),&gwarea1,'#')<0) {
79 debuga(_("Invalid \"redirector_log_format\" option in your sarg.conf (too many characters in column separator)\n"));
80 exit(EXIT_FAILURE);
81 }
82 if(strcmp(leks,"end") != 0) {
83 if (getword_limit(res,sizeof(res),&gwarea,sep[0])<0) {
84 debuga(_("Parsing of tag \"%s\" in redirector log %s returned no result\n"),leks,wentp);
85 RedirectorErrors++;
86 return;
87 }
88 if(strcmp(leks,"year") == 0) {
89 year=atoi(res);
90 } else if(strcmp(leks,"mon") == 0) {
91 mon=atoi(res);
92 } else if(strcmp(leks,"day") == 0) {
93 day=atoi(res);
94 } else if(strcmp(leks,"hour") == 0) {
95 if (strlen(res)>=sizeof(hour)) {
96 debuga(_("Hour string too long in redirector log file %s\n"),wentp);
97 RedirectorErrors++;
98 return;
99 }
100 strcpy(hour,res);
101 } else if(strcmp(leks,"source") == 0) {
102 if (strlen(res)>=sizeof(source)) {
103 debuga(_("Banning source name too long in redirector log file %s\n"),wentp);
104 RedirectorErrors++;
105 return;
106 }
107 strcpy(source,res);
108 } else if(strcmp(leks,"list") == 0) {
109 if (strlen(res)>=sizeof(list)) {
110 debuga(_("Banning list name too long in redirector log file %s\n"),wentp);
111 RedirectorErrors++;
112 return;
113 }
114 strcpy(list,res);
115 } else if(strcmp(leks,"ip") == 0) {
116 if (strlen(res)>=sizeof(ip)) {
117 debuga(_("IP address too long in redirector log file %s\n"),wentp);
118 RedirectorErrors++;
119 return;
120 }
121 strcpy(ip,res);
122 } else if(strcmp(leks,"user") == 0) {
123 if (strlen(res)>=sizeof(user)) {
124 debuga(_("User ID too long in redirector log file %s\n"),wentp);
125 RedirectorErrors++;
126 return;
127 }
128 strcpy(user,res);
129 } else if(strcmp(leks,"url") == 0) {
130 /*
131 * Don't worry about the url being truncated as we only keep the host name
132 * any way...
133 */
134 safe_strcpy(full_url,res,sizeof(full_url));
135 }
136 }
137 }
138 } else {
139 if (getword_atoll(&lyear,&gwarea,'-')<0 || getword_atoll(&lmon,&gwarea,'-')<0 ||
140 getword_atoll(&lday,&gwarea,' ')<0) {
141 debuga(_("Invalid date in file \"%s\"\n"),wentp);
142 RedirectorErrors++;
143 return;
144 }
145 year=(int)lyear;
146 mon=(int)lmon;
147 day=(int)lday;
148 if (getword(hour,sizeof(hour),&gwarea,' ')<0) {
149 debuga(_("Invalid time in file \"%s\"\n"),wentp);
150 RedirectorErrors++;
151 return;
152 }
153 if (getword_skip(MAXLEN,&gwarea,'(')<0 || getword(source,sizeof(source),&gwarea,'/')<0) {
154 debuga(_("Invalid redirected source in file \"%s\"\n"),wentp);
155 RedirectorErrors++;
156 return;
157 }
158 if (getword(list,sizeof(list),&gwarea,'/')<0) {
159 debuga(_("Invalid redirected list in file \"%s\"\n"),wentp);
160 RedirectorErrors++;
161 return;
162 }
163 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword_limit(full_url,sizeof(full_url),&gwarea,' ')<0) {
164 debuga(_("Invalid url in file \"%s\"\n"),wentp);
165 RedirectorErrors++;
166 return;
167 }
168 if (getword(ip,sizeof(ip),&gwarea,'/')<0) {
169 debuga(_("Invalid source IP in file \"%s\"\n"),wentp);
170 RedirectorErrors++;
171 return;
172 }
173 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
174 debuga(_("Invalid user in file \"%s\"\n"),wentp);
175 RedirectorErrors++;
176 return;
177 }
178 }
179 url=process_url(full_url,false);
180
181 //sprintf(warea,"%04d%02d%02d",year,mon,day);
182
183 if(RedirectorFilterOutDate) {
184 idata = year*10000+mon*100+day;
185 if(idata < dfrom || idata > duntil)
186 return;
187 }
188
189 if(UserIp) {
190 strcpy(user,ip);
191 id_is_ip=true;
192 } else {
193 id_is_ip=false;
194 if (user[0]=='\0' || (user[1]=='\0' && (user[0]=='-' || user[0]==' '))) {
195 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
196 strcpy(user,ip);
197 id_is_ip=true;
198 }
199 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
200 return;
201 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
202 strcpy(user,"everybody");
203 }
204 }
205 uinfo=userinfo_find_from_id(user);
206 if (!uinfo) {
207 uinfo=userinfo_create(user,(id_is_ip) ? NULL : ip);
208 uinfo->no_report=true;
209 if(Ip2Name && id_is_ip) ip2name(user,sizeof(user));
210 user_find(userlabel,MAX_USER_LEN, user);
211 userinfo_label(uinfo,userlabel);
212 }
213 fprintf(fp_ou,"%s\t%04d%02d%02d\t%s\t%s\t%s\t",uinfo->id,year,mon,day,hour,ip,url);
214 if (source[0] && list[0])
215 fprintf(fp_ou,"%s/%s\n",source,list);
216 else if (source[0])
217 fprintf(fp_ou,"%s\n",source);
218 else
219 fprintf(fp_ou,"%s\n",list);
220 redirector_count++;
221 }
222
223 static void read_log(const char *wentp, FILE *fp_ou,int dfrom,int duntil)
224 {
225 FILE *fp_in = NULL;
226 char *buf;
227 int i;
228 longline line;
229
230 if(debug) {
231 debuga(_("Reading redirector log file %s\n"),wentp);
232 }
233
234 /* With squidGuard, you can log groups in only one log file.
235 We must parse each log files only one time. Example :
236 dest porn {
237 domainlist porn/domains
238 urllist porn/urls
239 log file1.log
240 }
241 dest aggressive {
242 domainlist aggressive/domains
243 urllist aggressive/urls
244 log file2.log
245 }
246 dest audio-video {
247 domainlist audio-video/domains
248 urllist audio-video/urls
249 log file1.log
250 }
251 */
252 for (i=0; i<nfiles_done; i++)
253 if (!strcmp(wentp, files_done[i])) return;
254
255 nfiles_done++;
256 files_done = realloc(files_done, nfiles_done*sizeof(char *));
257 if (!files_done) {
258 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
259 exit(EXIT_FAILURE);
260 }
261 files_done[nfiles_done-1] = strdup(wentp);
262 if (!files_done[nfiles_done-1]) {
263 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
264 exit(EXIT_FAILURE);
265 }
266
267 if ((fp_in=fopen(wentp,"r"))==NULL) {
268 debuga(_("Cannot open file \"%s\": %s\n"),wentp,strerror(errno));
269 exit(EXIT_FAILURE);
270 }
271
272 if ((line=longline_create())==NULL) {
273 debuga(_("Not enough memory to read the redirector log\n"));
274 exit(EXIT_FAILURE);
275 }
276
277 while ((buf=longline_read(fp_in,line)) != NULL) {
278 parse_log(fp_ou,buf);
279 }
280 if (fclose(fp_in)==EOF) {
281 debuga(_("Read error in \"%s\": %s\n"),wentp,strerror(errno));
282 exit(EXIT_FAILURE);
283 }
284 longline_destroy(&line);
285 return;
286 }
287
288
289 void redirector_log(void)
290 {
291 FILE *fp_ou = NULL, *fp_guard = NULL;
292 char buf[MAXLEN];
293 char guard_in[MAXLEN];
294 char logdir[MAXLEN];
295 char user[MAXLEN];
296 char tmp6[MAXLEN];
297 int i;
298 int y;
299 int cstatus;
300 int dfrom, duntil;
301 char *str;
302 char *str2;
303
304 str2 = user;
305
306 if(SquidGuardConf[0] == '\0' && NRedirectorLogs == 0) {
307 if (debugz>=LogLevel_Process) debugaz(_("No redirector logs provided to produce that kind of report\n"));
308 return;
309 }
310
311 snprintf(guard_in,sizeof(guard_in),"%s/redirector.int_unsort",tmp);
312 if((fp_ou=fopen(guard_in,"w"))==NULL) {
313 debuga(_("Cannot open file \"%s\": %s\n"),guard_in,strerror(errno));
314 exit(EXIT_FAILURE);
315 }
316
317 dfrom=(period.start.tm_year+1900)*10000+(period.start.tm_mon+1)*100+period.start.tm_mday;
318 duntil=(period.end.tm_year+1900)*10000+(period.end.tm_mon+1)*100+period.end.tm_mday;
319
320 if (NRedirectorLogs>0) {
321 for (i=0 ; i<NRedirectorLogs ; i++)
322 read_log(RedirectorLogs[i],fp_ou,dfrom,duntil);
323 } else {
324 if(access(SquidGuardConf, R_OK) != 0) {
325 debuga(_("Cannot open file \"%s\": %s\n"),SquidGuardConf,strerror(errno));
326 exit(EXIT_FAILURE);
327 }
328
329 if((fp_guard=fopen(SquidGuardConf,"r"))==NULL) {
330 debuga(_("Cannot open file \"%s\": %s\n"),SquidGuardConf,strerror(errno));
331 exit(EXIT_FAILURE);
332 }
333
334 logdir[0]=0;
335 while(fgets(buf,sizeof(buf),fp_guard)!=NULL) {
336 fixendofline(buf);
337 if((str=get_param_value("logdir",buf))!=NULL) {
338 /*
339 We want to tolerate spaces inside the directory name but we must also
340 remove the trailing spaces left by the editor after the directory name.
341 This should not be a problem as nobody use a file name with trailing spaces.
342 */
343 for (y=strlen(str)-1 ; y>=0 && (unsigned char)str[y]<=' ' ; y--);
344 if (y>=sizeof(logdir)-1) y=sizeof(logdir)-2;
345 logdir[y+1] = '\0';
346 while (y>=0) {
347 logdir[y] = str[y];
348 y--;
349 }
350 } else if((str=get_param_value("log",buf))!=NULL) {
351 if((str2=get_param_value("anonymous",str))!=NULL)
352 str=str2;
353
354 /*
355 If logdir is defined, we prepend it to the log file name, otherwise, we assume
356 the log directive provides an absolute file name to the log file. Therefore,
357 we don't need to add an additionnal / at the beginning of the log file name.
358 */
359 y=(logdir[0]) ? sprintf(wentp,"%s/",logdir) : 0;
360 /*
361 Spaces are allowed in the name of the log file. The file name ends at the first #
362 because it is assumed it is an end of line comment. Any space before the # is then
363 removed. Any control character (i.e. a character with a code lower than 32) ends
364 the file name. That includes the terminating zero.
365 */
366 while((unsigned char)*str>=' ' && *str!='#' && y<sizeof(wentp)-1)
367 wentp[y++]=*str++;
368 if(*str=='#') {
369 str--;
370 while(*str==' ' && y>0) {
371 str--;
372 y--;
373 }
374 }
375 wentp[y]=0;
376 read_log(wentp,fp_ou,dfrom,duntil);
377 }
378 }
379 if (fclose(fp_guard)==EOF) {
380 debuga(_("Read error in \"%s\": %s\n"),SquidGuardConf,strerror(errno));
381 exit(EXIT_FAILURE);
382 }
383 }
384
385 if (fp_ou && fclose(fp_ou)==EOF) {
386 debuga(_("Write error in \"%s\": %s\n"),guard_in,strerror(errno));
387 exit(EXIT_FAILURE);
388 }
389
390 if (files_done) {
391 for (y=0; y<nfiles_done; y++)
392 if (files_done[y]) free(files_done[y]);
393 free(files_done);
394 }
395
396 if (redirector_count) {
397 snprintf(redirector_sorted,sizeof(redirector_sorted),"%s/redirector.int_log",tmp);
398 if(debug) {
399 debuga(_("Sorting file: %s\n"),redirector_sorted);
400 }
401
402 if (snprintf(tmp6,sizeof(tmp6),"sort -t \"\t\" -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in, redirector_sorted)>=sizeof(tmp6)) {
403 debuga(_("Sort command too long when sorting file \"%s\" to \"%s\"\n"),guard_in,redirector_sorted);
404 exit(EXIT_FAILURE);
405 }
406 cstatus=system(tmp6);
407 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
408 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
409 debuga(_("sort command: %s\n"),tmp6);
410 exit(EXIT_FAILURE);
411 }
412 }
413
414 if (!KeepTempLog && unlink(guard_in)) {
415 debuga(_("Cannot delete \"%s\": %s\n"),guard_in,strerror(errno));
416 exit(EXIT_FAILURE);
417 }
418 return;
419 }
420
421 static void show_ignored_redirector(FILE *fp_ou,int count)
422 {
423 char ignored[80];
424
425 snprintf(ignored,sizeof(ignored),ngettext("%d more redirector entry not shown here&hellip;","%d more redirector entries not shown here&hellip;",count),count);
426 fprintf(fp_ou,"<tr><td class=\"data\"></td><td class=\"data\"></td><td class=\"data\"></td><td class=\"data2 more\">%s</td><td class=\"data\"></td></tr>\n",ignored);
427 }
428
429 void redirector_report(void)
430 {
431 FILE *fp_in = NULL, *fp_ou = NULL;
432
433 char *buf;
434 char *url;
435 char report[MAXLEN];
436 char ip[45];
437 char rule[255];
438 char oip[45];
439 char user[MAXLEN];
440 char ouser[MAXLEN];
441 char data[15];
442 char hora[15];
443 char ouser2[255];
444 char oname[MAXLEN];
445 bool z=false;
446 int count=0;
447 long long int data2;
448 bool new_user;
449 struct getwordstruct gwarea;
450 const struct userinfostruct *uinfo;
451 struct tm t;
452 longline line;
453
454 ouser[0]='\0';
455 ouser2[0]='\0';
456
457 if(!redirector_count) {
458 if (debugz>=LogLevel_Process) {
459 if (redirector_sorted[0])
460 debugaz(_("Redirector report not generated because it is empty\n"));
461 }
462 return;
463 }
464
465 snprintf(report,sizeof(report),"%s/redirector.html",outdirname);
466
467 if((fp_in=fopen(redirector_sorted,"r"))==NULL) {
468 debuga(_("Cannot open file \"%s\": %s\n"),redirector_sorted,strerror(errno));
469 exit(EXIT_FAILURE);
470 }
471
472 if((fp_ou=fopen(report,"w"))==NULL) {
473 debuga(_("Cannot open file \"%s\": %s\n"),report,strerror(errno));
474 exit(EXIT_FAILURE);
475 }
476
477 if ((line=longline_create())==NULL) {
478 debuga(_("Not enough memory to read the processed redirector log\n"));
479 exit(EXIT_FAILURE);
480 }
481
482 write_html_header(fp_ou,(IndexTree == INDEX_TREE_DATE) ? 3 : 1,_("Redirector report"),HTML_JS_NONE);
483 fputs("<tr><td class=\"header_c\">",fp_ou);
484 fprintf(fp_ou,_("Period: %s"),period.html);
485 fputs("</td></tr>\n",fp_ou);
486 fprintf(fp_ou,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Redirector report"));
487 close_html_header(fp_ou);
488
489 fputs("<div class=\"report\"><table cellpadding=1 cellspacing=2>\n",fp_ou);
490 fprintf(fp_ou,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"),_("RULE"));
491
492 while((buf=longline_read(fp_in,line))!=NULL) {
493 getword_start(&gwarea,buf);
494 if (getword(user,sizeof(user),&gwarea,'\t')<0) {
495 debuga(_("Invalid user in file \"%s\"\n"),redirector_sorted);
496 exit(EXIT_FAILURE);
497 }
498 if (getword_atoll(&data2,&gwarea,'\t')<0) {
499 debuga(_("Invalid date in file \"%s\"\n"),redirector_sorted);
500 exit(EXIT_FAILURE);
501 }
502 if (getword(hora,sizeof(hora),&gwarea,'\t')<0) {
503 debuga(_("Invalid time in file \"%s\"\n"),redirector_sorted);
504 exit(EXIT_FAILURE);
505 }
506 if (getword(ip,sizeof(ip),&gwarea,'\t')<0) {
507 debuga(_("Invalid IP address in file \"%s\"\n"),redirector_sorted);
508 exit(EXIT_FAILURE);
509 }
510 if (getword_ptr(buf,&url,&gwarea,'\t')<0) {
511 debuga(_("Invalid url in file \"%s\"\n"),redirector_sorted);
512 exit(EXIT_FAILURE);
513 }
514 if (getword(rule,sizeof(rule),&gwarea,'\n')<0) {
515 debuga(_("Invalid rule in file \"%s\"\n"),redirector_sorted);
516 exit(EXIT_FAILURE);
517 }
518
519 uinfo=userinfo_find_from_id(user);
520 if (!uinfo) {
521 debuga(_("Unknown user ID %s in file \"%s\"\n"),user,redirector_sorted);
522 exit(EXIT_FAILURE);
523 }
524
525 computedate(data2/10000,(data2/100)%10,data2%100,&t);
526 strftime(data,sizeof(data),"%x",&t);
527
528 new_user=false;
529 if(!z) {
530 strcpy(ouser,user);
531 strcpy(oip,ip);
532 strcpy(oname,ip);
533 if (Ip2Name && !uinfo->id_is_ip) ip2name(oname,sizeof(oname));
534 z=true;
535 new_user=true;
536 } else {
537 if(strcmp(ouser,user) != 0) {
538 strcpy(ouser,user);
539 new_user=true;
540 }
541 if(strcmp(oip,ip) != 0) {
542 strcpy(oip,ip);
543 strcpy(oname,ip);
544 if (Ip2Name && !uinfo->id_is_ip) ip2name(oname,sizeof(oname));
545 new_user=true;
546 }
547 }
548
549 if(SquidGuardReportLimit) {
550 if(strcmp(ouser2,uinfo->label) == 0) {
551 count++;
552 } else {
553 if(count>SquidGuardReportLimit && SquidGuardReportLimit>0)
554 show_ignored_redirector(fp_ou,count-SquidGuardReportLimit);
555 count=1;
556 strcpy(ouser2,uinfo->label);
557 }
558 if(count > SquidGuardReportLimit)
559 continue;
560 }
561
562 if (new_user)
563 fprintf(fp_ou,"<tr><td class=\"data2\">%s</td><td class=\"data2\">%s</td>",uinfo->label,ip);
564 else
565 fputs("<tr><td class=\"data2\"></td><td class=\"data2\"></td>",fp_ou);
566 fprintf(fp_ou,"<td class=\"data2\">%s-%s</td><td class=\"data2\">",data,hora);
567 output_html_link(fp_ou,url,100);
568 fprintf(fp_ou,"</td><td class=\"data2\">%s</td></tr>\n",rule);
569 }
570 if (fclose(fp_in)==EOF) {
571 debuga(_("Read error in \"%s\": %s\n"),redirector_sorted,strerror(errno));
572 exit(EXIT_FAILURE);
573 }
574 longline_destroy(&line);
575
576 if(count>SquidGuardReportLimit && SquidGuardReportLimit>0)
577 show_ignored_redirector(fp_ou,count-SquidGuardReportLimit);
578
579 fputs("</table>\n",fp_ou);
580
581 if (RedirectorErrors>0)
582 {
583 fputs("<div class=\"warn\"><span>",fp_ou);
584 fprintf(fp_ou,ngettext("%d error found in the log file. Some entries may be missing.","%d errors found in the log file. Some entries may be missing.",RedirectorErrors),RedirectorErrors);
585 fputs("</span></div>\n",fp_ou);
586 }
587
588 fputs("</div>\n",fp_ou);
589 write_html_trailer(fp_ou);
590 if (fclose(fp_ou)==EOF) {
591 debuga(_("Write error in \"%s\": %s\n"),report,strerror(errno));
592 exit(EXIT_FAILURE);
593 }
594
595 if (!KeepTempLog && unlink(redirector_sorted)) {
596 debuga(_("Cannot delete \"%s\": %s\n"),redirector_sorted,strerror(errno));
597 exit(EXIT_FAILURE);
598 }
599
600 return;
601 }