]>
git.ipfire.org Git - thirdparty/sarg.git/blob - redirector.c
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
30 static char ** files_done
= NULL
;
31 static int nfiles_done
= 0 ;
33 //! The number of invalid lines found in the redirector report.
34 static int RedirectorErrors
= 0 ;
35 //! The file containing the sorted entries.
36 static char redirector_sorted
[ MAXLEN
]= "" ;
38 extern char StripUserSuffix
[ MAX_USER_LEN
];
39 extern int StripSuffixLen
;
41 static void parse_log ( FILE * fp_ou
, char * buf
, int dfrom
, int duntil
)
43 char leks
[ 5 ], sep
[ 2 ], res
[ MAXLEN
];
45 char source
[ 128 ], list
[ 128 ];
46 char full_url
[ MAX_URL_LEN
];
48 char UserBuf
[ MAX_USER_LEN
];
51 char userlabel
[ MAX_USER_LEN
];
52 char IpBuf
[ MAX_USER_LEN
];
53 long long int lmon
, lday
, lyear
;
57 struct getwordstruct gwarea
;
58 struct getwordstruct gwarea1
;
59 struct userinfostruct
* uinfo
;
60 enum UserProcessError PUser
;
62 getword_start (& gwarea
, buf
);
63 if ( RedirectorLogFormat
[ 0 ] != '\0' ) {
64 getword_start (& gwarea1
, RedirectorLogFormat
);
66 if ( getword ( leks
, sizeof ( leks
),& gwarea1
, '#' )< 0 ) {
67 debuga ( __FILE__
, __LINE__
, _ ( "Invalid \" redirector_log_format \" option in your sarg.conf (too many characters before first tag) \n " ));
79 while ( strcmp ( leks
, "end" ) != 0 ) {
80 if ( getword ( leks
, sizeof ( leks
),& gwarea1
, '#' )< 0 ) {
81 debuga ( __FILE__
, __LINE__
, _ ( "Invalid \" redirector_log_format \" option in your sarg.conf (missing # at end of tag) \n " ));
84 if ( getword ( sep
, sizeof ( sep
),& gwarea1
, '#' )< 0 ) {
85 debuga ( __FILE__
, __LINE__
, _ ( "Invalid \" redirector_log_format \" option in your sarg.conf (too many characters in column separator) \n " ));
88 if ( strcmp ( leks
, "end" ) != 0 ) {
89 if ( getword_limit ( res
, sizeof ( res
),& gwarea
, sep
[ 0 ])< 0 ) {
90 debuga ( __FILE__
, __LINE__
, _ ( "Parsing of tag \" %s \" in redirector log \" %s \" returned no result \n " ), leks
, wentp
);
94 if ( strcmp ( leks
, "year" ) == 0 ) {
96 } else if ( strcmp ( leks
, "mon" ) == 0 ) {
98 } else if ( strcmp ( leks
, "day" ) == 0 ) {
100 } else if ( strcmp ( leks
, "hour" ) == 0 ) {
101 if ( strlen ( res
)>= sizeof ( hour
)) {
102 debuga ( __FILE__
, __LINE__
, _ ( "Hour string too long in redirector log file \" %s \"\n " ), wentp
);
107 } else if ( strcmp ( leks
, "source" ) == 0 ) {
108 if ( strlen ( res
)>= sizeof ( source
)) {
109 debuga ( __FILE__
, __LINE__
, _ ( "Banning source name too long in redirector log file \" %s \"\n " ), wentp
);
114 } else if ( strcmp ( leks
, "list" ) == 0 ) {
115 if ( strlen ( res
)>= sizeof ( list
)) {
116 debuga ( __FILE__
, __LINE__
, _ ( "Banning list name too long in redirector log file \" %s \"\n " ), wentp
);
121 } else if ( strcmp ( leks
, "ip" ) == 0 ) {
122 if ( strlen ( res
)>= sizeof ( ip
)) {
123 debuga ( __FILE__
, __LINE__
, _ ( "IP address too long in redirector log file \" %s \"\n " ), wentp
);
128 } else if ( strcmp ( leks
, "user" ) == 0 ) {
129 if ( strlen ( res
)>= sizeof ( UserBuf
)) {
130 debuga ( __FILE__
, __LINE__
, _ ( "User ID too long in redirector log file \" %s \"\n " ), wentp
);
135 } else if ( strcmp ( leks
, "url" ) == 0 ) {
137 * Don't worry about the url being truncated as we only keep the host name
140 safe_strcpy ( full_url
, res
, sizeof ( full_url
));
145 if ( getword_atoll (& lyear
,& gwarea
, '-' )< 0 || getword_atoll (& lmon
,& gwarea
, '-' )< 0 ||
146 getword_atoll (& lday
,& gwarea
, ' ' )< 0 ) {
147 debuga ( __FILE__
, __LINE__
, _ ( "Invalid date in file \" %s \"\n " ), wentp
);
154 if ( getword ( hour
, sizeof ( hour
),& gwarea
, ' ' )< 0 ) {
155 debuga ( __FILE__
, __LINE__
, _ ( "Invalid time in file \" %s \"\n " ), wentp
);
159 if ( getword_skip ( MAXLEN
,& gwarea
, '(' )< 0 || getword ( source
, sizeof ( source
),& gwarea
, '/' )< 0 ) {
160 debuga ( __FILE__
, __LINE__
, _ ( "Invalid redirected source in file \" %s \"\n " ), wentp
);
164 if ( getword ( list
, sizeof ( list
),& gwarea
, '/' )< 0 ) {
165 debuga ( __FILE__
, __LINE__
, _ ( "Invalid redirected list in file \" %s \"\n " ), wentp
);
169 if ( getword_skip ( MAXLEN
,& gwarea
, ' ' )< 0 || getword_limit ( full_url
, sizeof ( full_url
),& gwarea
, ' ' )< 0 ) {
170 debuga ( __FILE__
, __LINE__
, _ ( "Invalid url in file \" %s \"\n " ), wentp
);
174 if ( getword ( ip
, sizeof ( ip
),& gwarea
, '/' )< 0 ) {
175 debuga ( __FILE__
, __LINE__
, _ ( "Invalid source IP in file \" %s \"\n " ), wentp
);
179 if ( getword_skip ( MAXLEN
,& gwarea
, ' ' )< 0 || getword ( UserBuf
, sizeof ( UserBuf
),& gwarea
, ' ' )< 0 ) {
180 debuga ( __FILE__
, __LINE__
, _ ( "Invalid user in file \" %s \"\n " ), wentp
);
185 url
= process_url ( full_url
, false );
187 //sprintf(warea,"%04d%02d%02d",year,mon,day);
189 if ( RedirectorFilterOutDate
) {
190 idata
= year
* 10000 + mon
* 100 + day
;
191 if ( idata
< dfrom
|| idata
> duntil
)
196 PUser
= process_user (& user
, ip
,& id_is_ip
);
197 if ( PUser
!= USERERR_NoError
) return ;
199 uinfo
= userinfo_find_from_id ( user
);
201 uinfo
= userinfo_create ( user
,( id_is_ip
) ? NULL
: ip
);
202 uinfo
-> no_report
= true ;
203 if ( Ip2Name
&& id_is_ip
) {
205 ip2name ( IpBuf
, sizeof ( IpBuf
));
208 user_find ( userlabel
, MAX_USER_LEN
, user
);
209 userinfo_label ( uinfo
, userlabel
);
211 fprintf ( fp_ou
, "%s \t %04d%02d%02d \t %s \t %s \t %s \t " , uinfo
-> id
, year
, mon
, day
, hour
, ip
, url
);
212 if ( source
[ 0 ] && list
[ 0 ])
213 fprintf ( fp_ou
, "%s/%s \n " , source
, list
);
215 fprintf ( fp_ou
, "%s \n " , source
);
217 fprintf ( fp_ou
, "%s \n " , list
);
221 static void read_log ( const char * wentp
, FILE * fp_ou
, int dfrom
, int duntil
)
223 FileObject
* fp_in
= NULL
;
229 debuga ( __FILE__
, __LINE__
, _ ( "Reading redirector log file \" %s \"\n " ), wentp
);
232 /* With squidGuard, you can log groups in only one log file.
233 We must parse each log files only one time. Example :
235 domainlist porn/domains
240 domainlist aggressive/domains
241 urllist aggressive/urls
245 domainlist audio-video/domains
246 urllist audio-video/urls
250 for ( i
= 0 ; i
< nfiles_done
; i
++)
251 if (! strcmp ( wentp
, files_done
[ i
])) return ;
254 files_done
= realloc ( files_done
, nfiles_done
* sizeof ( char *));
256 debuga ( __FILE__
, __LINE__
, _ ( "Not enough memory to store the name of the new redirector log to be read - %s \n " ), strerror ( errno
));
259 files_done
[ nfiles_done
- 1 ] = strdup ( wentp
);
260 if (! files_done
[ nfiles_done
- 1 ]) {
261 debuga ( __FILE__
, __LINE__
, _ ( "Not enough memory to store the name of the new redirector log to be read - %s \n " ), strerror ( errno
));
265 if (( fp_in
= FileObject_Open ( wentp
))== NULL
) {
266 debuga ( __FILE__
, __LINE__
, _ ( "Cannot open file \" %s \" : %s \n " ), wentp
, FileObject_GetLastOpenError ());
270 if (( line
= longline_create ())== NULL
) {
271 debuga ( __FILE__
, __LINE__
, _ ( "Not enough memory to read file \" %s \"\n " ), wentp
);
275 while (( buf
= longline_read ( fp_in
, line
)) != NULL
) {
276 parse_log ( fp_ou
, buf
, dfrom
, duntil
);
278 if ( FileObject_Close ( fp_in
)) {
279 debuga ( __FILE__
, __LINE__
, _ ( "Read error in \" %s \" : %s \n " ), wentp
, FileObject_GetLastCloseError ());
282 longline_destroy (& line
);
287 void redirector_log ( void )
289 FILE * fp_ou
= NULL
, * fp_guard
= NULL
;
291 char guard_in
[ MAXLEN
];
304 if ( SquidGuardConf
[ 0 ] == '\0' && NRedirectorLogs
== 0 ) {
305 if ( debugz
>= LogLevel_Process
) debugaz ( __FILE__
, __LINE__
, _ ( "No redirector logs provided to produce that kind of report \n " ));
309 snprintf ( guard_in
, sizeof ( guard_in
), "%s/redirector.int_unsort" , tmp
);
310 if (( fp_ou
= fopen ( guard_in
, "w" ))== NULL
) {
311 debuga ( __FILE__
, __LINE__
, _ ( "Cannot open file \" %s \" : %s \n " ), guard_in
, strerror ( errno
));
315 getperiod_torange (& period
,& dfrom
,& duntil
);
317 if ( NRedirectorLogs
> 0 ) {
318 for ( i
= 0 ; i
< NRedirectorLogs
; i
++)
319 read_log ( RedirectorLogs
[ i
], fp_ou
, dfrom
, duntil
);
321 if ( access ( SquidGuardConf
, R_OK
) != 0 ) {
322 debuga ( __FILE__
, __LINE__
, _ ( "Cannot open file \" %s \" : %s \n " ), SquidGuardConf
, strerror ( errno
));
326 if (( fp_guard
= fopen ( SquidGuardConf
, "r" ))== NULL
) {
327 debuga ( __FILE__
, __LINE__
, _ ( "Cannot open file \" %s \" : %s \n " ), SquidGuardConf
, strerror ( errno
));
332 while ( fgets ( buf
, sizeof ( buf
), fp_guard
)!= NULL
) {
334 if (( str
= get_param_value ( "logdir" , buf
))!= NULL
) {
336 We want to tolerate spaces inside the directory name but we must also
337 remove the trailing spaces left by the editor after the directory name.
338 This should not be a problem as nobody use a file name with trailing spaces.
340 for ( y
= strlen ( str
)- 1 ; y
>= 0 && ( unsigned char ) str
[ y
]<= ' ' ; y
--);
341 if ( y
>= sizeof ( logdir
)- 1 ) y
= sizeof ( logdir
)- 2 ;
347 } else if (( str
= get_param_value ( "log" , buf
))!= NULL
) {
348 if (( str2
= get_param_value ( "anonymous" , str
))!= NULL
)
352 If logdir is defined, we prepend it to the log file name, otherwise, we assume
353 the log directive provides an absolute file name to the log file. Therefore,
354 we don't need to add an additionnal / at the beginning of the log file name.
356 y
=( logdir
[ 0 ]) ? sprintf ( wentp
, "%s/" , logdir
) : 0 ;
358 Spaces are allowed in the name of the log file. The file name ends at the first #
359 because it is assumed it is an end of line comment. Any space before the # is then
360 removed. Any control character (i.e. a character with a code lower than 32) ends
361 the file name. That includes the terminating zero.
363 while (( unsigned char )* str
>= ' ' && * str
!= '#' && y
< sizeof ( wentp
)- 1 )
367 while (* str
== ' ' && y
> 0 ) {
373 read_log ( wentp
, fp_ou
, dfrom
, duntil
);
376 if ( fclose ( fp_guard
)== EOF
) {
377 debuga ( __FILE__
, __LINE__
, _ ( "Read error in \" %s \" : %s \n " ), SquidGuardConf
, strerror ( errno
));
382 if ( fp_ou
&& fclose ( fp_ou
)== EOF
) {
383 debuga ( __FILE__
, __LINE__
, _ ( "Write error in \" %s \" : %s \n " ), guard_in
, strerror ( errno
));
388 for ( y
= 0 ; y
< nfiles_done
; y
++)
389 if ( files_done
[ y
]) free ( files_done
[ y
]);
393 if ( redirector_count
) {
394 snprintf ( redirector_sorted
, sizeof ( redirector_sorted
), "%s/redirector.int_log" , tmp
);
396 debuga ( __FILE__
, __LINE__
, _ ( "Sorting file \" %s \"\n " ), redirector_sorted
);
399 if ( snprintf ( tmp6
, sizeof ( tmp6
), "sort -t \"\t\" -k 1,1 -k 2,2 -k 4,4 \" %s \" -o \" %s \" " , guard_in
, redirector_sorted
)>= sizeof ( tmp6
)) {
400 debuga ( __FILE__
, __LINE__
, _ ( "Sort command too long when sorting file \" %s \" to \" %s \"\n " ), guard_in
, redirector_sorted
);
403 cstatus
= system ( tmp6
);
404 if (! WIFEXITED ( cstatus
) || WEXITSTATUS ( cstatus
)) {
405 debuga ( __FILE__
, __LINE__
, _ ( "sort command return status %d \n " ), WEXITSTATUS ( cstatus
));
406 debuga ( __FILE__
, __LINE__
, _ ( "sort command: %s \n " ), tmp6
);
411 if (! KeepTempLog
&& unlink ( guard_in
)) {
412 debuga ( __FILE__
, __LINE__
, _ ( "Cannot delete \" %s \" : %s \n " ), guard_in
, strerror ( errno
));
418 static void show_ignored_redirector ( FILE * fp_ou
, int count
)
422 snprintf ( ignored
, sizeof ( ignored
), ngettext ( "%d more redirector entry not shown here…" , "%d more redirector entries not shown here…" , count
), count
);
423 fprintf ( fp_ou
, "<tr><td class= \" data \" ></td><td class= \" data \" ></td><td class= \" data \" ></td><td class= \" data2 more \" >%s</td><td class= \" data \" ></td></tr> \n " , ignored
);
426 void redirector_report ( void )
428 FileObject
* fp_in
= NULL
;
447 struct getwordstruct gwarea
;
448 const struct userinfostruct
* uinfo
;
455 if (! redirector_count
) {
456 if ( debugz
>= LogLevel_Process
) {
457 if ( redirector_sorted
[ 0 ])
458 debugaz ( __FILE__
, __LINE__
, _ ( "Redirector report not generated because it is empty \n " ));
463 snprintf ( report
, sizeof ( report
), "%s/redirector.html" , outdirname
);
465 if (( fp_in
= FileObject_Open ( redirector_sorted
))== NULL
) {
466 debuga ( __FILE__
, __LINE__
, _ ( "Cannot open file \" %s \" : %s \n " ), redirector_sorted
, FileObject_GetLastOpenError ());
470 if (( fp_ou
= fopen ( report
, "w" ))== NULL
) {
471 debuga ( __FILE__
, __LINE__
, _ ( "Cannot open file \" %s \" : %s \n " ), report
, strerror ( errno
));
475 if (( line
= longline_create ())== NULL
) {
476 debuga ( __FILE__
, __LINE__
, _ ( "Not enough memory to read file \" %s \"\n " ), redirector_sorted
);
480 write_html_header ( fp_ou
,( IndexTree
== INDEX_TREE_DATE
) ? 3 : 1 , _ ( "Redirector report" ), HTML_JS_NONE
);
481 fputs ( "<tr><td class= \" header_c \" >" , fp_ou
);
482 fprintf ( fp_ou
, _ ( "Period: %s" ), period
. html
);
483 fputs ( "</td></tr> \n " , fp_ou
);
484 fprintf ( fp_ou
, "<tr><th class= \" header_c \" >%s</th></tr> \n " , _ ( "Redirector report" ));
485 close_html_header ( fp_ou
);
487 fputs ( "<div class= \" report \" ><table cellpadding=1 cellspacing=2> \n " , fp_ou
);
488 fprintf ( fp_ou
, "<tr><th class= \" header_l \" >%s</th><th class= \" header_l \" >%s</th><th class= \" header_l \" >%s</th><th class= \" header_l \" >%s</th><th class= \" header_l \" >%s</th></tr> \n " , _ ( "USERID" ), _ ( "IP/NAME" ), _ ( "DATE/TIME" ), _ ( "ACCESSED SITE" ), _ ( "RULE" ));
490 while (( buf
= longline_read ( fp_in
, line
))!= NULL
) {
491 getword_start (& gwarea
, buf
);
492 if ( getword ( user
, sizeof ( user
),& gwarea
, ' \t ' )< 0 ) {
493 debuga ( __FILE__
, __LINE__
, _ ( "Invalid user in file \" %s \"\n " ), redirector_sorted
);
496 if ( getword_atoll (& data2
,& gwarea
, ' \t ' )< 0 ) {
497 debuga ( __FILE__
, __LINE__
, _ ( "Invalid date in file \" %s \"\n " ), redirector_sorted
);
500 if ( getword ( hora
, sizeof ( hora
),& gwarea
, ' \t ' )< 0 ) {
501 debuga ( __FILE__
, __LINE__
, _ ( "Invalid time in file \" %s \"\n " ), redirector_sorted
);
504 if ( getword ( ip
, sizeof ( ip
),& gwarea
, ' \t ' )< 0 ) {
505 debuga ( __FILE__
, __LINE__
, _ ( "Invalid IP address in file \" %s \"\n " ), redirector_sorted
);
508 if ( getword_ptr ( buf
,& url
,& gwarea
, ' \t ' )< 0 ) {
509 debuga ( __FILE__
, __LINE__
, _ ( "Invalid url in file \" %s \"\n " ), redirector_sorted
);
512 if ( getword ( rule
, sizeof ( rule
),& gwarea
, ' \n ' )< 0 ) {
513 debuga ( __FILE__
, __LINE__
, _ ( "Invalid rule in file \" %s \"\n " ), redirector_sorted
);
517 uinfo
= userinfo_find_from_id ( user
);
519 debuga ( __FILE__
, __LINE__
, _ ( "Unknown user ID %s in file \" %s \"\n " ), user
, redirector_sorted
);
523 computedate ( data2
/ 10000 ,( data2
/ 100 )% 10 , data2
% 100 ,& t
);
524 strftime ( data
, sizeof ( data
), "%x" ,& t
);
531 if ( Ip2Name
&& ! uinfo
-> id_is_ip
) ip2name ( oname
, sizeof ( oname
));
535 if ( strcmp ( ouser
, user
) != 0 ) {
539 if ( strcmp ( oip
, ip
) != 0 ) {
542 if ( Ip2Name
&& ! uinfo
-> id_is_ip
) ip2name ( oname
, sizeof ( oname
));
547 if ( SquidGuardReportLimit
) {
548 if ( strcmp ( ouser2
, uinfo
-> label
) == 0 ) {
551 if ( count
> SquidGuardReportLimit
&& SquidGuardReportLimit
> 0 )
552 show_ignored_redirector ( fp_ou
, count
- SquidGuardReportLimit
);
554 strcpy ( ouser2
, uinfo
-> label
);
556 if ( count
> SquidGuardReportLimit
)
561 fprintf ( fp_ou
, "<tr><td class= \" data2 \" >%s</td><td class= \" data2 \" >%s</td>" , uinfo
-> label
, ip
);
563 fputs ( "<tr><td class= \" data2 \" ></td><td class= \" data2 \" ></td>" , fp_ou
);
564 fprintf ( fp_ou
, "<td class= \" data2 \" >%s-%s</td><td class= \" data2 \" >" , data
, hora
);
565 output_html_link ( fp_ou
, url
, 100 );
566 fprintf ( fp_ou
, "</td><td class= \" data2 \" >%s</td></tr> \n " , rule
);
568 if ( FileObject_Close ( fp_in
)) {
569 debuga ( __FILE__
, __LINE__
, _ ( "Read error in \" %s \" : %s \n " ), redirector_sorted
, FileObject_GetLastCloseError ());
572 longline_destroy (& line
);
574 if ( count
> SquidGuardReportLimit
&& SquidGuardReportLimit
> 0 )
575 show_ignored_redirector ( fp_ou
, count
- SquidGuardReportLimit
);
577 fputs ( "</table> \n " , fp_ou
);
579 if ( RedirectorErrors
> 0 )
581 fputs ( "<div class= \" warn \" ><span>" , fp_ou
);
582 fprintf ( fp_ou
, ngettext ( "%d error found in the log file. Some entries may be missing." , "%d errors found in the log file. Some entries may be missing." , RedirectorErrors
), RedirectorErrors
);
583 fputs ( "</span></div> \n " , fp_ou
);
586 fputs ( "</div> \n " , fp_ou
);
587 write_html_trailer ( fp_ou
);
588 if ( fclose ( fp_ou
)== EOF
) {
589 debuga ( __FILE__
, __LINE__
, _ ( "Write error in \" %s \" : %s \n " ), report
, strerror ( errno
));
593 if (! KeepTempLog
&& unlink ( redirector_sorted
)) {
594 debuga ( __FILE__
, __LINE__
, _ ( "Cannot delete \" %s \" : %s \n " ), redirector_sorted
, strerror ( errno
));