]> git.ipfire.org Git - thirdparty/sarg.git/blob - squidguard_log.c
Fix wrong tags label.
[thirdparty/sarg.git] / squidguard_log.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2010
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 static char **files_done = NULL;
31 static int nfiles_done = 0;
32
33
34 static void read_log(const char *wentp, FILE *fp_ou,int dfrom,int duntil)
35 {
36 FILE *fp_in = NULL;
37 char *buf;
38 char leks[5], sep[2], res[MAXLEN];
39 char hour[15];
40 char source[128], list[128];
41 char url[MAX_URL_LEN];
42 char user[MAX_USER_LEN];
43 char ip[25];
44 long long int lmon, lday, lyear;
45 int mon, day, year;
46 int idata=0;
47 int i;
48 char *str;
49 bool id_is_ip;
50 struct getwordstruct gwarea;
51 struct getwordstruct gwarea1;
52 struct userinfostruct *uinfo;
53 longline line;
54
55 if(debug) {
56 debuga(_("Reading redirector log file %s\n"),wentp);
57 }
58
59 /* With squidGuard, you can log groups in only one log file.
60 We must parse each log files only one time. Example :
61 dest porn {
62 domainlist porn/domains
63 urllist porn/urls
64 log file1.log
65 }
66 dest aggressive {
67 domainlist aggressive/domains
68 urllist aggressive/urls
69 log file2.log
70 }
71 dest audio-video {
72 domainlist audio-video/domains
73 urllist audio-video/urls
74 log file1.log
75 }
76 */
77 for (i=0; i<nfiles_done; i++)
78 if (!strcmp(wentp, files_done[i])) return;
79
80 nfiles_done++;
81 files_done = realloc(files_done, nfiles_done*sizeof(char *));
82 if (!files_done) {
83 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
84 exit(EXIT_FAILURE);
85 }
86 files_done[nfiles_done-1] = strdup(wentp);
87 if (!files_done[nfiles_done-1]) {
88 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
89 exit(EXIT_FAILURE);
90 }
91
92 if ((fp_in=fopen(wentp,"r"))==NULL) {
93 debuga(_("(squidguard) Cannot open log file %s\n"),wentp);
94 exit(EXIT_FAILURE);
95 }
96
97 if ((line=longline_create())==NULL) {
98 debuga(_("Not enough memory to read the redirector log\n"));
99 exit(EXIT_FAILURE);
100 }
101
102 while ((buf=longline_read(fp_in,line)) != NULL) {
103 getword_start(&gwarea,buf);
104 if(RedirectorLogFormat[0] != '\0') {
105 getword_start(&gwarea1,RedirectorLogFormat);
106 leks[0]='\0';
107 if (getword(leks,sizeof(leks),&gwarea1,'#')<0) {
108 debuga(_("There is a broken record or garbage in your %s file\n"),wentp);
109 exit(EXIT_FAILURE);
110 }
111 year=0;
112 mon=0;
113 day=0;
114 hour[0]='\0';
115 source[0]='\0';
116 list[0]='\0';
117 ip[0]='\0';
118 user[0]='\0';
119 url[0]='\0';
120 while(strcmp(leks,"end") != 0) {
121 if (getword(leks,sizeof(leks),&gwarea1,'#')<0 || getword(sep,sizeof(sep),&gwarea1,'#')<0) {
122 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp);
123 exit(EXIT_FAILURE);
124 }
125 if(strcmp(leks,"end") != 0) {
126 if (getword_limit(res,sizeof(res),&gwarea,sep[0])<0) {
127 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp);
128 exit(EXIT_FAILURE);
129 }
130 if(strcmp(leks,"year") == 0) {
131 year=atoi(res);
132 } else if(strcmp(leks,"mon") == 0) {
133 mon=atoi(res);
134 } else if(strcmp(leks,"day") == 0) {
135 day=atoi(res);
136 } else if(strcmp(leks,"hour") == 0) {
137 if (strlen(res)>=sizeof(hour)) {
138 debuga(_("Hour string too long in redirector log file %s\n"),wentp);
139 exit(EXIT_FAILURE);
140 }
141 strcpy(hour,res);
142 } else if(strcmp(leks,"source") == 0) {
143 if (strlen(res)>=sizeof(source)) {
144 debuga(_("Banning source name too long in redirector log file %s\n"),wentp);
145 exit(EXIT_FAILURE);
146 }
147 strcpy(source,res);
148 } else if(strcmp(leks,"list") == 0) {
149 if (strlen(res)>=sizeof(list)) {
150 debuga(_("Banning list name too long in redirector log file %s\n"),wentp);
151 exit(EXIT_FAILURE);
152 }
153 strcpy(list,res);
154 } else if(strcmp(leks,"ip") == 0) {
155 if (strlen(res)>=sizeof(ip)) {
156 debuga(_("IP address too long in redirector log file %s\n"),wentp);
157 exit(EXIT_FAILURE);
158 }
159 strcpy(ip,res);
160 } else if(strcmp(leks,"user") == 0) {
161 if (strlen(res)>=sizeof(user)) {
162 debuga(_("User ID too long in redirector log file %s\n"),wentp);
163 exit(EXIT_FAILURE);
164 }
165 strcpy(user,res);
166 } else if(strcmp(leks,"url") == 0) {
167 if (strlen(res)>=sizeof(url)) {
168 debuga(_("URL too long in redirector log file %s\n"),wentp);
169 exit(EXIT_FAILURE);
170 }
171 strcpy(url,res);
172 }
173 }
174 }
175 } else {
176 if (getword_atoll(&lyear,&gwarea,'-')<0 || getword_atoll(&lmon,&gwarea,'-')<0 ||
177 getword_atoll(&lday,&gwarea,' ')<0) {
178 debuga(_("Invalid date found in file %s\n"),wentp);
179 exit(EXIT_FAILURE);
180 }
181 year=(int)lyear;
182 mon=(int)lmon;
183 day=(int)lday;
184 if (getword(hour,sizeof(hour),&gwarea,' ')<0) {
185 debuga(_("Invalid time found in file %s\n"),wentp);
186 exit(EXIT_FAILURE);
187 }
188 if (getword_skip(MAXLEN,&gwarea,'(')<0 || getword(source,sizeof(source),&gwarea,'/')<0) {
189 debuga(_("Invalid redirected source in file %s\n"),wentp);
190 exit(EXIT_FAILURE);
191 }
192 if (getword(list,sizeof(list),&gwarea,'/')<0) {
193 debuga(_("Invalid redirected list in file %s\n"),wentp);
194 exit(EXIT_FAILURE);
195 }
196 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword_limit(url,sizeof(url),&gwarea,' ')<0) {
197 debuga(_("Invalid URL in file %s\n"),wentp);
198 exit(EXIT_FAILURE);
199 }
200 if (getword(ip,sizeof(ip),&gwarea,'/')<0) {
201 debuga(_("Invalid source IP in file %s\n"),wentp);
202 exit(EXIT_FAILURE);
203 }
204 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
205 debuga(_("Invalid user in file %s\n"),wentp);
206 exit(EXIT_FAILURE);
207 }
208 /*
209 The URL may be "http://url:port/data" if the method is GET or simply "url:port/" if the method is CONNECT.
210 The following code removes the protocol:// if it is detected and always truncates the URL after the domain name.
211 It will fail if the URL doesn't start with the protocol and contains two consecutive / in the path (i.e.
212 the URL is not normalized).
213 */
214 str=strchr(url,'/');
215 if (str) {
216 if (str[1]=='/') {
217 str+=2;
218 for (i=0 ; *str && *str!='/' ; i++) url[i]=*str++;
219 url[i]='\0';
220 } else {
221 *str='\0';
222 }
223 }
224 }
225
226 //sprintf(warea,"%04d%02d%02d",year,mon,day);
227
228 if(RedirectorFilterOutDate) {
229 idata = year*10000+mon*100+day;
230 if(idata < dfrom || idata > duntil)
231 continue;
232 }
233
234 if(UserIp) {
235 strcpy(user,ip);
236 id_is_ip=true;
237 } else {
238 id_is_ip=false;
239 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
240 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
241 strcpy(user,ip);
242 id_is_ip=true;
243 }
244 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
245 continue;
246 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
247 strcpy(user,"everybody");
248 }
249 }
250 uinfo=userinfo_find_from_id(user);
251 if (!uinfo) {
252 uinfo=userinfo_create(user);
253 uinfo->id_is_ip=id_is_ip;
254 if(Ip2Name && id_is_ip) ip2name(user,sizeof(user));
255 user_find(uinfo->label,MAX_USER_LEN, user);
256 }
257 fprintf(fp_ou,"%s\t%04d%02d%02d\t%s\t%s\t%s\t",uinfo->id,year,mon,day,hour,ip,url);
258 if (source[0] && list[0])
259 fprintf(fp_ou,"%s/%s\n",source,list);
260 else if (source[0])
261 fprintf(fp_ou,"%s\n",source);
262 else
263 fprintf(fp_ou,"%s\n",list);
264 squidguard_count++;
265 }
266 fclose(fp_in);
267 longline_destroy(&line);
268 return;
269 }
270
271
272 void squidguard_log(void)
273 {
274 FILE *fp_ou = NULL, *fp_guard = NULL;
275 char buf[MAXLEN];
276 char guard_in[MAXLEN];
277 char guard_ou[MAXLEN];
278 char logdir[MAXLEN];
279 char user[MAXLEN];
280 char tmp6[MAXLEN];
281 int i;
282 int y;
283 int cstatus;
284 int dfrom, duntil;
285 char *str;
286 char *str2;
287
288 str2 = user;
289
290 if(SquidGuardConf[0] == '\0' && NRedirectorLogs == 0)
291 return;
292
293 sprintf(guard_in,"%s/redirector.unsort",tmp);
294 sprintf(guard_ou,"%s/redirector.log",tmp);
295 if((fp_ou=fopen(guard_in,"a"))==NULL) {
296 debuga(_("(squidguard) Cannot open log file %s\n"),guard_in);
297 exit(EXIT_FAILURE);
298 }
299
300 dfrom=(period.start.tm_year+1900)*10000+(period.start.tm_mon+1)*100+period.start.tm_mday;
301 duntil=(period.end.tm_year+1900)*10000+(period.end.tm_mon+1)*100+period.end.tm_mday;
302
303 if (NRedirectorLogs>0) {
304 for (i=0 ; i<NRedirectorLogs ; i++)
305 read_log(RedirectorLogs[i],fp_ou,dfrom,duntil);
306 } else {
307 if(access(SquidGuardConf, R_OK) != 0) {
308 debuga(_("Cannot open squidGuard config file: %s\n"),SquidGuardConf);
309 exit(EXIT_FAILURE);
310 }
311
312 if((fp_guard=fopen(SquidGuardConf,"r"))==NULL) {
313 debuga(_("(squidguard) Cannot open log file %s\n"),SquidGuardConf);
314 exit(EXIT_FAILURE);
315 }
316
317 logdir[0]=0;
318 while(fgets(buf,sizeof(buf),fp_guard)!=NULL) {
319 fixendofline(buf);
320 if((str=get_param_value("logdir",buf))!=NULL) {
321 /*
322 We want to tolerate spaces inside the directory name but we must also
323 remove the trailing spaces left by the editor after the directory name.
324 This should not be a problem as nobody use a file name with trailing spaces.
325 */
326 for (y=strlen(str)-1 ; y>=0 && (unsigned char)str[y]<=' ' ; y--);
327 if (y>=sizeof(logdir)-1) y=sizeof(logdir)-2;
328 logdir[y+1] = '\0';
329 while (y>=0) {
330 logdir[y] = str[y];
331 y--;
332 }
333 } else if((str=get_param_value("log",buf))!=NULL) {
334 if((str2=get_param_value("anonymous",str))!=NULL)
335 str=str2;
336
337 /*
338 If logdir is defined, we prepend it to the log file name, otherwise, we assume
339 the log directive provides an absolute file name to the log file. Therefore,
340 we don't need to add an additionnal / at the beginning of the log file name.
341 */
342 y=(logdir[0]) ? sprintf(wentp,"%s/",logdir) : 0;
343 /*
344 Spaces are allowed in the name of the log file. The file name ends at the first #
345 because it is assumed it is an end of line comment. Any space before the # is then
346 removed. Any control character (i.e. a character with a code lower than 32) ends
347 the file name. That includes the terminating zero.
348 */
349 while((unsigned char)*str>=' ' && *str!='#' && y<sizeof(wentp)-1)
350 wentp[y++]=*str++;
351 if(*str=='#') {
352 str--;
353 while(*str==' ' && y>0) {
354 str--;
355 y--;
356 }
357 }
358 wentp[y]=0;
359 read_log(wentp,fp_ou,dfrom,duntil);
360 }
361 }
362 }
363
364 if (fp_guard) fclose(fp_guard);
365 if (fp_ou) fclose(fp_ou);
366
367 if (files_done) {
368 for (y=0; y<nfiles_done; y++)
369 if (files_done[y]) free(files_done[y]);
370 free(files_done);
371 }
372
373 if(debug) {
374 debuga(_("Sorting file: %s\n"),guard_ou);
375 }
376
377 sprintf(tmp6,"sort -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in, guard_ou);
378 cstatus=system(tmp6);
379 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
380 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
381 debuga(_("sort command: %s\n"),tmp6);
382 exit(EXIT_FAILURE);
383 }
384
385 unlink(guard_in);
386 return;
387 }