]>
git.ipfire.org Git - thirdparty/sarg.git/blob - squidguard_log.c
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
30 static char **files_done
= NULL
;
31 static int nfiles_done
= 0;
34 static void read_log(const char *wentp
, FILE *fp_ou
)
37 char leks
[5], sep
[2], res
[MAXLEN
];
38 char mon
[20], day
[3], year
[5], hour
[15];
41 char url
[MAX_URL_LEN
];
42 char user
[MAX_USER_LEN
];
46 struct getwordstruct gwarea
;
47 struct getwordstruct gwarea1
;
50 getword_start(&gwarea
,_("Reading access log file"));
51 if (getword(url
,sizeof(url
),&gwarea
,' ')<0 || getword_skip(MAXLEN
,&gwarea
,' ')<0) {
52 printf("SARG: Maybe you have a broken record or garbage in your %s string.\n",_("Reading access log file"));
55 debuga("%s squidGuard %s: %s\n",url
,gwarea
.current
,wentp
);
58 /* With squidGuard, you can log groups in only one log file.
59 We must parse each log files only one time. Example :
61 domainlist porn/domains
66 domainlist aggressive/domains
67 urllist aggressive/urls
71 domainlist audio-video/domains
72 urllist audio-video/urls
76 for (i
=0; i
<nfiles_done
; i
++)
77 if (!strcmp(wentp
, files_done
[i
])) return;
80 files_done
= realloc(files_done
, nfiles_done
*sizeof(char *));
82 perror("parse squidGuard - realloc");
85 files_done
[nfiles_done
-1] = strdup(wentp
);
86 if (!files_done
[nfiles_done
-1]) {
87 perror("parse squidGuard - strdup");
91 if ((fp_in
=fopen(wentp
,"r"))==NULL
) {
92 fprintf(stderr
, "SARG: (squidguard) %s: %s\n",_("Cannot open log file"),wentp
);
96 while (fgets(buf
,sizeof(buf
),fp_in
) != NULL
) {
97 getword_start(&gwarea
,buf
);
98 if(SquidGuardLogFormat
[0] != '\0') {
99 getword_start(&gwarea1
,SquidGuardLogFormat
);
101 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0) {
102 debuga(_("There is a broken record or garbage in your %s file\n"),wentp
);
105 while(strcmp(leks
,"end") != 0) {
106 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0 || getword(sep
,sizeof(sep
),&gwarea1
,'#')<0) {
107 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp
);
110 if(strcmp(leks
,"end") != 0) {
111 if (getword(res
,sizeof(res
),&gwarea
,sep
[0])<0) {
112 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp
);
115 if(strcmp(leks
,"year") == 0) {
116 if (strlen(res
)>=sizeof(year
)) {
117 debuga(_("Year string too long in squidGuard log file %s\n"),wentp
);
121 } else if(strcmp(leks
,"mon") == 0) {
122 if (strlen(res
)>=sizeof(mon
)) {
123 debuga(_("Month string too long in squidGuard log file %s\n"),wentp
);
127 } else if(strcmp(leks
,"day") == 0) {
128 if (strlen(res
)>=sizeof(day
)) {
129 debuga(_("Day string too long in squidGuard log file %s\n"),wentp
);
133 } else if(strcmp(leks
,"hour") == 0) {
134 if (strlen(res
)>=sizeof(hour
)) {
135 debuga(_("Hour string too long in squidGuard log file %s\n"),wentp
);
139 } else if(strcmp(leks
,"list") == 0) {
140 if (strlen(res
)>=sizeof(list
)) {
141 debuga(_("Banning list name too long in squidGuard log file %s\n"),wentp
);
145 } else if(strcmp(leks
,"ip") == 0) {
146 if (strlen(res
)>=sizeof(ip
)) {
147 debuga(_("IP address too long in squidGuard log file %s\n"),wentp
);
151 } else if(strcmp(leks
,"user") == 0) {
152 if (strlen(res
)>=sizeof(user
)) {
153 debuga(_("User ID too long in squidGuard log file %s\n"),wentp
);
157 } else if(strcmp(leks
,"url") == 0) {
158 if (strlen(res
)>=sizeof(url
)) {
159 debuga(_("URL too long in squidGuard log file %s\n"),wentp
);
167 if (getword(year
,sizeof(year
),&gwarea
,'-')<0 || getword(mon
,sizeof(mon
),&gwarea
,'-')<0 ||
168 getword(day
,sizeof(day
),&gwarea
,' ')<0 || getword(hour
,sizeof(hour
),&gwarea
,' ')<0 ||
169 getword_skip(MAXLEN
,&gwarea
,'/')<0 || getword(list
,sizeof(list
),&gwarea
,'/')<0 ||
170 getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword(url
,sizeof(url
),&gwarea
,' ')<0 ||
171 getword(ip
,sizeof(ip
),&gwarea
,'/')<0 || getword_skip(MAXLEN
,&gwarea
,' ')<0 ||
172 getword(user
,sizeof(user
),&gwarea
,' ')<0) {
173 debuga(_("There is a broken record or garbage in your %s file\n"),wentp
);
177 The URL may be "http://url:port/data" if the method is GET or simply "url:port/" if the method is CONNECT.
178 The following code removes the protocol:// if it is detected and always truncates the URL after the domain name.
179 It will fail if the URL doesn't start with the protocol and contains two consecutive / in the path (i.e.
180 the URL is not normalized).
186 for (i
=0 ; *str
&& *str
!='/' ; i
++) url
[i
]=*str
++;
194 sprintf(warea
,"%s%s%s",year
,mon
,day
);
195 sprintf(wdata
,"%s%s%s",year
,mon
,day
);
198 if(SquidguardIgnoreDate
) {
199 if(idata
< dfrom
|| idata
> duntil
)
203 if (strcmp(user
,"-") == 0) {
207 fprintf(fp_ou
,"%s\t%s%s%s\t%s\t%s\t%s\t%s\n",user
,year
,mon
,day
,hour
,ip
,url
,list
);
210 if (fp_in
) fclose(fp_in
);
215 void squidguard_log(void)
217 FILE *fp_ou
= NULL
, *fp_guard
= NULL
;
218 char guard_in
[MAXLEN
];
219 char guard_ou
[MAXLEN
];
221 char year
[10], day
[10], mon
[10];
231 if(strlen(SquidGuardConf
) < 1 && strlen(SquidGuardLogAlternate
) < 1)
234 if (SquidGuardLogAlternate
[0] != '\0')
235 SquidGuardConf
[0]='\0';
237 sprintf(guard_in
,"%s/squidguard.unsort",tmp
);
238 sprintf(guard_ou
,"%s/squidguard.log",tmp
);
239 if((fp_ou
=fopen(guard_in
,"a"))==NULL
) {
240 fprintf(stderr
, "SARG: (squidguard) %s: %s\n",_("Cannot open log file"),guard_in
);
248 if(SquidguardIgnoreDate
) {
249 if(strcmp(df
,"e") == 0) {
250 strncpy(day
,period
,2);
251 strncpy(mon
,period
+2,3);
252 strncpy(year
,period
+5,4);
254 sprintf(warea
,"%s%s%s",year
,mon
,day
);
256 strncpy(day
,period
+10,2);
257 strncpy(mon
,period
+12,3);
258 strncpy(year
,period
+15,4);
260 sprintf(warea
,"%s%s%s",year
,mon
,day
);
263 strncpy(day
,period
+7,2);
264 strncpy(mon
,period
+4,3);
265 strncpy(year
,period
,4);
267 sprintf(warea
,"%s%s%s",year
,mon
,day
);
269 strncpy(day
,period
+17,2);
270 strncpy(mon
,period
+14,3);
271 strncpy(year
,period
+10,4);
273 sprintf(warea
,"%s%s%s",year
,mon
,day
);
278 if(SquidGuardConf
[0] != 0) {
279 if(access(SquidGuardConf
, R_OK
) != 0) {
280 debuga(_("Cannot open squidGuard config file: %s\n"),SquidGuardConf
);
284 if((fp_guard
=fopen(SquidGuardConf
,"r"))==NULL
) {
285 fprintf(stderr
, "SARG: (squidguard) %s: %s\n",_("Cannot open log file"),SquidGuardConf
);
290 while(fgets(buf
,sizeof(buf
),fp_guard
)!=NULL
) {
292 if((str
=get_param_value("logdir",buf
))!=NULL
) {
294 We want to tolerate spaces inside the directory name but we must also
295 remove the trailing spaces left by the editor after the directory name.
296 This should not be a problem as nobody use a file name with trailing spaces.
298 for (y
=strlen(str
)-1 ; y
>=0 && (unsigned char)str
[y
]<=' ' ; y
--);
299 if (y
>=sizeof(logdir
)-1) y
=sizeof(logdir
)-2;
305 } else if((str
=get_param_value("log",buf
))!=NULL
) {
306 if((str2
=get_param_value("anonymous",str
))!=NULL
)
310 If logdir is defined, we prepend it to the log file name, otherwise, we assume
311 the log directive provides an absolute file name to the log file. Therefore,
312 we don't need to add an additionnal / at the beginning of the log file name.
314 y
=(logdir
[0]) ? sprintf(wentp
,"%s/",logdir
) : 0;
316 Spaces are allowed in the name of the log file. The file name ends at the first #
317 because it is assumed it is an end of line comment. Any space before the # is then
318 removed. Any control character (i.e. a character with a code lower than 32) ends
319 the file name. That includes the terminating zero.
321 while((unsigned char)*str
>=' ' && *str
!='#' && y
<sizeof(wentp
)-1)
325 while(*str
==' ' && y
>0) {
331 read_log(wentp
,fp_ou
);
335 sprintf(wentp
,"%s",SquidGuardLogAlternate
);
336 read_log(wentp
,fp_ou
);
339 if (fp_guard
) fclose(fp_guard
);
340 if (fp_ou
) fclose(fp_ou
);
343 for (y
=0; y
<nfiles_done
; y
++)
344 if (files_done
[y
]) free(files_done
[y
]);
349 debuga(_("Sorting file: %s\n"),guard_ou
);
352 sprintf(tmp6
,"sort -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in
, guard_ou
);
353 cstatus
=system(tmp6
);
354 if (!WIFEXITED(cstatus
) || WEXITSTATUS(cstatus
)) {
355 fprintf(stderr
, "SARG: sort command return status %d\n",WEXITSTATUS(cstatus
));
356 fprintf(stderr
, "SARG: sort command: %s\n",tmp6
);