]>
git.ipfire.org Git - thirdparty/sarg.git/blob - redirector.c
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
30 static char ** files_done
= NULL
;
31 static int nfiles_done
= 0 ;
33 //! The number of invalid lines found in the redirector report.
34 static int RedirectorErrors
= 0 ;
35 //! The file containing the sorted entries.
36 static char redirector_sorted
[ MAXLEN
]= "" ;
38 extern char StripUserSuffix
[ MAX_USER_LEN
];
39 extern int StripSuffixLen
;
41 static void parse_log ( FILE * fp_ou
, char * buf
, int dfrom
, int duntil
)
43 char leks
[ 5 ], sep
[ 2 ], res
[ MAXLEN
];
45 char source
[ 128 ], list
[ 128 ];
46 char full_url
[ MAX_URL_LEN
];
48 char UserBuf
[ MAX_USER_LEN
];
51 char userlabel
[ MAX_USER_LEN
];
52 char IpBuf
[ MAX_USER_LEN
];
53 long long int lmon
, lday
, lyear
;
57 struct getwordstruct gwarea
;
58 struct getwordstruct gwarea1
;
59 struct userinfostruct
* uinfo
;
60 enum UserProcessError PUser
;
62 getword_start (& gwarea
, buf
);
63 if ( RedirectorLogFormat
[ 0 ] != '\0' ) {
64 getword_start (& gwarea1
, RedirectorLogFormat
);
66 if ( getword ( leks
, sizeof ( leks
),& gwarea1
, '#' )< 0 ) {
67 debuga ( __FILE__
, __LINE__
, _ ( "Invalid \" redirector_log_format \" option in your sarg.conf (too many characters before first tag) \n " ));
79 while ( strcmp ( leks
, "end" ) != 0 ) {
80 if ( getword ( leks
, sizeof ( leks
),& gwarea1
, '#' )< 0 ) {
81 debuga ( __FILE__
, __LINE__
, _ ( "Invalid \" redirector_log_format \" option in your sarg.conf (missing # at end of tag) \n " ));
84 if ( getword ( sep
, sizeof ( sep
),& gwarea1
, '#' )< 0 ) {
85 debuga ( __FILE__
, __LINE__
, _ ( "Invalid \" redirector_log_format \" option in your sarg.conf (too many characters in column separator) \n " ));
88 if ( strcmp ( leks
, "end" ) != 0 ) {
89 if ( getword_limit ( res
, sizeof ( res
),& gwarea
, sep
[ 0 ])< 0 ) {
90 debuga ( __FILE__
, __LINE__
, _ ( "Parsing of tag \" %s \" in redirector log \" %s \" returned no result \n " ), leks
, wentp
);
94 if ( strcmp ( leks
, "year" ) == 0 ) {
96 } else if ( strcmp ( leks
, "mon" ) == 0 ) {
98 } else if ( strcmp ( leks
, "day" ) == 0 ) {
100 } else if ( strcmp ( leks
, "hour" ) == 0 ) {
101 if ( strlen ( res
)>= sizeof ( hour
)) {
102 debuga ( __FILE__
, __LINE__
, _ ( "Hour string too long in redirector log file \" %s \"\n " ), wentp
);
107 } else if ( strcmp ( leks
, "source" ) == 0 ) {
108 if ( strlen ( res
)>= sizeof ( source
)) {
109 debuga ( __FILE__
, __LINE__
, _ ( "Banning source name too long in redirector log file \" %s \"\n " ), wentp
);
114 } else if ( strcmp ( leks
, "list" ) == 0 ) {
115 if ( strlen ( res
)>= sizeof ( list
)) {
116 debuga ( __FILE__
, __LINE__
, _ ( "Banning list name too long in redirector log file \" %s \"\n " ), wentp
);
121 } else if ( strcmp ( leks
, "ip" ) == 0 ) {
122 if ( strlen ( res
)>= sizeof ( ip
)) {
123 debuga ( __FILE__
, __LINE__
, _ ( "IP address too long in redirector log file \" %s \"\n " ), wentp
);
128 } else if ( strcmp ( leks
, "user" ) == 0 ) {
129 if ( strlen ( res
)>= sizeof ( UserBuf
)) {
130 debuga ( __FILE__
, __LINE__
, _ ( "User ID too long in redirector log file \" %s \"\n " ), wentp
);
135 } else if ( strcmp ( leks
, "url" ) == 0 ) {
137 * Don't worry about the url being truncated as we only keep the host name
140 safe_strcpy ( full_url
, res
, sizeof ( full_url
));
145 if ( getword_atoll (& lyear
,& gwarea
, '-' )< 0 || getword_atoll (& lmon
,& gwarea
, '-' )< 0 ||
146 getword_atoll (& lday
,& gwarea
, ' ' )< 0 ) {
147 debuga ( __FILE__
, __LINE__
, _ ( "Invalid date in file \" %s \"\n " ), wentp
);
154 if ( getword ( hour
, sizeof ( hour
),& gwarea
, ' ' )< 0 ) {
155 debuga ( __FILE__
, __LINE__
, _ ( "Invalid time in file \" %s \"\n " ), wentp
);
159 if ( getword_skip ( MAXLEN
,& gwarea
, '(' )< 0 || getword ( source
, sizeof ( source
),& gwarea
, '/' )< 0 ) {
160 debuga ( __FILE__
, __LINE__
, _ ( "Invalid redirected source in file \" %s \"\n " ), wentp
);
164 if ( getword ( list
, sizeof ( list
),& gwarea
, '/' )< 0 ) {
165 debuga ( __FILE__
, __LINE__
, _ ( "Invalid redirected list in file \" %s \"\n " ), wentp
);
169 if ( getword_skip ( MAXLEN
,& gwarea
, ' ' )< 0 || getword_limit ( full_url
, sizeof ( full_url
),& gwarea
, ' ' )< 0 ) {
170 debuga ( __FILE__
, __LINE__
, _ ( "Invalid url in file \" %s \"\n " ), wentp
);
174 if ( getword ( ip
, sizeof ( ip
),& gwarea
, '/' )< 0 ) {
175 debuga ( __FILE__
, __LINE__
, _ ( "Invalid source IP in file \" %s \"\n " ), wentp
);
179 if ( getword_skip ( MAXLEN
,& gwarea
, ' ' )< 0 || getword ( UserBuf
, sizeof ( UserBuf
),& gwarea
, ' ' )< 0 ) {
180 debuga ( __FILE__
, __LINE__
, _ ( "Invalid user in file \" %s \"\n " ), wentp
);
185 url
= process_url ( full_url
, false );
187 //sprintf(warea,"%04d%02d%02d",year,mon,day);
189 if ( RedirectorFilterOutDate
) {
190 idata
= year
* 10000 + mon
* 100 + day
;
191 if ( idata
< dfrom
|| idata
> duntil
)
196 PUser
= process_user (& user
, ip
,& id_is_ip
);
197 if ( PUser
!= USERERR_NoError
) return ;
199 uinfo
= userinfo_find_from_id ( user
);
201 uinfo
= userinfo_create ( user
,( id_is_ip
) ? NULL
: ip
);
202 uinfo
-> no_report
= true ;
203 if ( Ip2Name
&& id_is_ip
) {
205 ip2name ( IpBuf
, sizeof ( IpBuf
));
208 user_find ( userlabel
, MAX_USER_LEN
, user
);
209 userinfo_label ( uinfo
, userlabel
);
211 fprintf ( fp_ou
, "%s \t %04d%02d%02d \t %s \t %s \t %s \t " , uinfo
-> id
, year
, mon
, day
, hour
, ip
, url
);
212 if ( source
[ 0 ] && list
[ 0 ])
213 fprintf ( fp_ou
, "%s/%s \n " , source
, list
);
215 fprintf ( fp_ou
, "%s \n " , source
);
217 fprintf ( fp_ou
, "%s \n " , list
);
221 static void read_log ( const char * wentp
, FILE * fp_ou
, int dfrom
, int duntil
)
223 FileObject
* fp_in
= NULL
;
229 debuga ( __FILE__
, __LINE__
, _ ( "Reading redirector log file \" %s \"\n " ), wentp
);
232 /* With squidGuard, you can log groups in only one log file.
233 We must parse each log files only one time. Example :
235 domainlist porn/domains
240 domainlist aggressive/domains
241 urllist aggressive/urls
245 domainlist audio-video/domains
246 urllist audio-video/urls
250 for ( i
= 0 ; i
< nfiles_done
; i
++)
251 if (! strcmp ( wentp
, files_done
[ i
])) return ;
254 files_done
= realloc ( files_done
, nfiles_done
* sizeof ( char *));
256 debuga ( __FILE__
, __LINE__
, _ ( "Not enough memory to store the name of the new redirector log to be read - %s \n " ), strerror ( errno
));
259 files_done
[ nfiles_done
- 1 ] = strdup ( wentp
);
260 if (! files_done
[ nfiles_done
- 1 ]) {
261 debuga ( __FILE__
, __LINE__
, _ ( "Not enough memory to store the name of the new redirector log to be read - %s \n " ), strerror ( errno
));
265 if (( fp_in
= FileObject_Open ( wentp
))== NULL
) {
266 debuga ( __FILE__
, __LINE__
, _ ( "Cannot open file \" %s \" : %s \n " ), wentp
, FileObject_GetLastOpenError ());
270 if (( line
= longline_create ())== NULL
) {
271 debuga ( __FILE__
, __LINE__
, _ ( "Not enough memory to read file \" %s \"\n " ), wentp
);
275 while (( buf
= longline_read ( fp_in
, line
)) != NULL
) {
276 parse_log ( fp_ou
, buf
, dfrom
, duntil
);
278 if ( FileObject_Close ( fp_in
)) {
279 debuga ( __FILE__
, __LINE__
, _ ( "Read error in \" %s \" : %s \n " ), wentp
, FileObject_GetLastCloseError ());
282 longline_destroy (& line
);
287 void redirector_log ( void )
289 FILE * fp_ou
= NULL
, * fp_guard
= NULL
;
291 char guard_in
[ MAXLEN
];
304 if ( SquidGuardConf
[ 0 ] == '\0' && NRedirectorLogs
== 0 ) {
305 if ( debugz
>= LogLevel_Process
) debugaz ( __FILE__
, __LINE__
, _ ( "No redirector logs provided to produce that kind of report \n " ));
309 snprintf ( guard_in
, sizeof ( guard_in
), "%s/redirector.int_unsort" , tmp
);
310 if (( fp_ou
= fopen ( guard_in
, "w" ))== NULL
) {
311 debuga ( __FILE__
, __LINE__
, _ ( "Cannot open file \" %s \" : %s \n " ), guard_in
, strerror ( errno
));
315 dfrom
=( period
. start
. tm_year
+ 1900 )* 10000 +( period
. start
. tm_mon
+ 1 )* 100 + period
. start
. tm_mday
;
316 duntil
=( period
. end
. tm_year
+ 1900 )* 10000 +( period
. end
. tm_mon
+ 1 )* 100 + period
. end
. tm_mday
;
318 if ( NRedirectorLogs
> 0 ) {
319 for ( i
= 0 ; i
< NRedirectorLogs
; i
++)
320 read_log ( RedirectorLogs
[ i
], fp_ou
, dfrom
, duntil
);
322 if ( access ( SquidGuardConf
, R_OK
) != 0 ) {
323 debuga ( __FILE__
, __LINE__
, _ ( "Cannot open file \" %s \" : %s \n " ), SquidGuardConf
, strerror ( errno
));
327 if (( fp_guard
= fopen ( SquidGuardConf
, "r" ))== NULL
) {
328 debuga ( __FILE__
, __LINE__
, _ ( "Cannot open file \" %s \" : %s \n " ), SquidGuardConf
, strerror ( errno
));
333 while ( fgets ( buf
, sizeof ( buf
), fp_guard
)!= NULL
) {
335 if (( str
= get_param_value ( "logdir" , buf
))!= NULL
) {
337 We want to tolerate spaces inside the directory name but we must also
338 remove the trailing spaces left by the editor after the directory name.
339 This should not be a problem as nobody use a file name with trailing spaces.
341 for ( y
= strlen ( str
)- 1 ; y
>= 0 && ( unsigned char ) str
[ y
]<= ' ' ; y
--);
342 if ( y
>= sizeof ( logdir
)- 1 ) y
= sizeof ( logdir
)- 2 ;
348 } else if (( str
= get_param_value ( "log" , buf
))!= NULL
) {
349 if (( str2
= get_param_value ( "anonymous" , str
))!= NULL
)
353 If logdir is defined, we prepend it to the log file name, otherwise, we assume
354 the log directive provides an absolute file name to the log file. Therefore,
355 we don't need to add an additionnal / at the beginning of the log file name.
357 y
=( logdir
[ 0 ]) ? sprintf ( wentp
, "%s/" , logdir
) : 0 ;
359 Spaces are allowed in the name of the log file. The file name ends at the first #
360 because it is assumed it is an end of line comment. Any space before the # is then
361 removed. Any control character (i.e. a character with a code lower than 32) ends
362 the file name. That includes the terminating zero.
364 while (( unsigned char )* str
>= ' ' && * str
!= '#' && y
< sizeof ( wentp
)- 1 )
368 while (* str
== ' ' && y
> 0 ) {
374 read_log ( wentp
, fp_ou
, dfrom
, duntil
);
377 if ( fclose ( fp_guard
)== EOF
) {
378 debuga ( __FILE__
, __LINE__
, _ ( "Read error in \" %s \" : %s \n " ), SquidGuardConf
, strerror ( errno
));
383 if ( fp_ou
&& fclose ( fp_ou
)== EOF
) {
384 debuga ( __FILE__
, __LINE__
, _ ( "Write error in \" %s \" : %s \n " ), guard_in
, strerror ( errno
));
389 for ( y
= 0 ; y
< nfiles_done
; y
++)
390 if ( files_done
[ y
]) free ( files_done
[ y
]);
394 if ( redirector_count
) {
395 snprintf ( redirector_sorted
, sizeof ( redirector_sorted
), "%s/redirector.int_log" , tmp
);
397 debuga ( __FILE__
, __LINE__
, _ ( "Sorting file \" %s \"\n " ), redirector_sorted
);
400 if ( snprintf ( tmp6
, sizeof ( tmp6
), "sort -t \"\t\" -k 1,1 -k 2,2 -k 4,4 \" %s \" -o \" %s \" " , guard_in
, redirector_sorted
)>= sizeof ( tmp6
)) {
401 debuga ( __FILE__
, __LINE__
, _ ( "Sort command too long when sorting file \" %s \" to \" %s \"\n " ), guard_in
, redirector_sorted
);
404 cstatus
= system ( tmp6
);
405 if (! WIFEXITED ( cstatus
) || WEXITSTATUS ( cstatus
)) {
406 debuga ( __FILE__
, __LINE__
, _ ( "sort command return status %d \n " ), WEXITSTATUS ( cstatus
));
407 debuga ( __FILE__
, __LINE__
, _ ( "sort command: %s \n " ), tmp6
);
412 if (! KeepTempLog
&& unlink ( guard_in
)) {
413 debuga ( __FILE__
, __LINE__
, _ ( "Cannot delete \" %s \" : %s \n " ), guard_in
, strerror ( errno
));
419 static void show_ignored_redirector ( FILE * fp_ou
, int count
)
423 snprintf ( ignored
, sizeof ( ignored
), ngettext ( "%d more redirector entry not shown here…" , "%d more redirector entries not shown here…" , count
), count
);
424 fprintf ( fp_ou
, "<tr><td class= \" data \" ></td><td class= \" data \" ></td><td class= \" data \" ></td><td class= \" data2 more \" >%s</td><td class= \" data \" ></td></tr> \n " , ignored
);
427 void redirector_report ( void )
429 FileObject
* fp_in
= NULL
;
448 struct getwordstruct gwarea
;
449 const struct userinfostruct
* uinfo
;
456 if (! redirector_count
) {
457 if ( debugz
>= LogLevel_Process
) {
458 if ( redirector_sorted
[ 0 ])
459 debugaz ( __FILE__
, __LINE__
, _ ( "Redirector report not generated because it is empty \n " ));
464 snprintf ( report
, sizeof ( report
), "%s/redirector.html" , outdirname
);
466 if (( fp_in
= FileObject_Open ( redirector_sorted
))== NULL
) {
467 debuga ( __FILE__
, __LINE__
, _ ( "Cannot open file \" %s \" : %s \n " ), redirector_sorted
, FileObject_GetLastOpenError ());
471 if (( fp_ou
= fopen ( report
, "w" ))== NULL
) {
472 debuga ( __FILE__
, __LINE__
, _ ( "Cannot open file \" %s \" : %s \n " ), report
, strerror ( errno
));
476 if (( line
= longline_create ())== NULL
) {
477 debuga ( __FILE__
, __LINE__
, _ ( "Not enough memory to read file \" %s \"\n " ), redirector_sorted
);
481 write_html_header ( fp_ou
,( IndexTree
== INDEX_TREE_DATE
) ? 3 : 1 , _ ( "Redirector report" ), HTML_JS_NONE
);
482 fputs ( "<tr><td class= \" header_c \" >" , fp_ou
);
483 fprintf ( fp_ou
, _ ( "Period: %s" ), period
. html
);
484 fputs ( "</td></tr> \n " , fp_ou
);
485 fprintf ( fp_ou
, "<tr><th class= \" header_c \" >%s</th></tr> \n " , _ ( "Redirector report" ));
486 close_html_header ( fp_ou
);
488 fputs ( "<div class= \" report \" ><table cellpadding=1 cellspacing=2> \n " , fp_ou
);
489 fprintf ( fp_ou
, "<tr><th class= \" header_l \" >%s</th><th class= \" header_l \" >%s</th><th class= \" header_l \" >%s</th><th class= \" header_l \" >%s</th><th class= \" header_l \" >%s</th></tr> \n " , _ ( "USERID" ), _ ( "IP/NAME" ), _ ( "DATE/TIME" ), _ ( "ACCESSED SITE" ), _ ( "RULE" ));
491 while (( buf
= longline_read ( fp_in
, line
))!= NULL
) {
492 getword_start (& gwarea
, buf
);
493 if ( getword ( user
, sizeof ( user
),& gwarea
, ' \t ' )< 0 ) {
494 debuga ( __FILE__
, __LINE__
, _ ( "Invalid user in file \" %s \"\n " ), redirector_sorted
);
497 if ( getword_atoll (& data2
,& gwarea
, ' \t ' )< 0 ) {
498 debuga ( __FILE__
, __LINE__
, _ ( "Invalid date in file \" %s \"\n " ), redirector_sorted
);
501 if ( getword ( hora
, sizeof ( hora
),& gwarea
, ' \t ' )< 0 ) {
502 debuga ( __FILE__
, __LINE__
, _ ( "Invalid time in file \" %s \"\n " ), redirector_sorted
);
505 if ( getword ( ip
, sizeof ( ip
),& gwarea
, ' \t ' )< 0 ) {
506 debuga ( __FILE__
, __LINE__
, _ ( "Invalid IP address in file \" %s \"\n " ), redirector_sorted
);
509 if ( getword_ptr ( buf
,& url
,& gwarea
, ' \t ' )< 0 ) {
510 debuga ( __FILE__
, __LINE__
, _ ( "Invalid url in file \" %s \"\n " ), redirector_sorted
);
513 if ( getword ( rule
, sizeof ( rule
),& gwarea
, ' \n ' )< 0 ) {
514 debuga ( __FILE__
, __LINE__
, _ ( "Invalid rule in file \" %s \"\n " ), redirector_sorted
);
518 uinfo
= userinfo_find_from_id ( user
);
520 debuga ( __FILE__
, __LINE__
, _ ( "Unknown user ID %s in file \" %s \"\n " ), user
, redirector_sorted
);
524 computedate ( data2
/ 10000 ,( data2
/ 100 )% 10 , data2
% 100 ,& t
);
525 strftime ( data
, sizeof ( data
), "%x" ,& t
);
532 if ( Ip2Name
&& ! uinfo
-> id_is_ip
) ip2name ( oname
, sizeof ( oname
));
536 if ( strcmp ( ouser
, user
) != 0 ) {
540 if ( strcmp ( oip
, ip
) != 0 ) {
543 if ( Ip2Name
&& ! uinfo
-> id_is_ip
) ip2name ( oname
, sizeof ( oname
));
548 if ( SquidGuardReportLimit
) {
549 if ( strcmp ( ouser2
, uinfo
-> label
) == 0 ) {
552 if ( count
> SquidGuardReportLimit
&& SquidGuardReportLimit
> 0 )
553 show_ignored_redirector ( fp_ou
, count
- SquidGuardReportLimit
);
555 strcpy ( ouser2
, uinfo
-> label
);
557 if ( count
> SquidGuardReportLimit
)
562 fprintf ( fp_ou
, "<tr><td class= \" data2 \" >%s</td><td class= \" data2 \" >%s</td>" , uinfo
-> label
, ip
);
564 fputs ( "<tr><td class= \" data2 \" ></td><td class= \" data2 \" ></td>" , fp_ou
);
565 fprintf ( fp_ou
, "<td class= \" data2 \" >%s-%s</td><td class= \" data2 \" >" , data
, hora
);
566 output_html_link ( fp_ou
, url
, 100 );
567 fprintf ( fp_ou
, "</td><td class= \" data2 \" >%s</td></tr> \n " , rule
);
569 if ( FileObject_Close ( fp_in
)) {
570 debuga ( __FILE__
, __LINE__
, _ ( "Read error in \" %s \" : %s \n " ), redirector_sorted
, FileObject_GetLastCloseError ());
573 longline_destroy (& line
);
575 if ( count
> SquidGuardReportLimit
&& SquidGuardReportLimit
> 0 )
576 show_ignored_redirector ( fp_ou
, count
- SquidGuardReportLimit
);
578 fputs ( "</table> \n " , fp_ou
);
580 if ( RedirectorErrors
> 0 )
582 fputs ( "<div class= \" warn \" ><span>" , fp_ou
);
583 fprintf ( fp_ou
, ngettext ( "%d error found in the log file. Some entries may be missing." , "%d errors found in the log file. Some entries may be missing." , RedirectorErrors
), RedirectorErrors
);
584 fputs ( "</span></div> \n " , fp_ou
);
587 fputs ( "</div> \n " , fp_ou
);
588 write_html_trailer ( fp_ou
);
589 if ( fclose ( fp_ou
)== EOF
) {
590 debuga ( __FILE__
, __LINE__
, _ ( "Write error in \" %s \" : %s \n " ), report
, strerror ( errno
));
594 if (! KeepTempLog
&& unlink ( redirector_sorted
)) {
595 debuga ( __FILE__
, __LINE__
, _ ( "Cannot delete \" %s \" : %s \n " ), redirector_sorted
, strerror ( errno
));