]> git.ipfire.org Git - thirdparty/sarg.git/blob - squidguard_log.c
Add the pt_BR translation
[thirdparty/sarg.git] / squidguard_log.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2011
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 static char **files_done = NULL;
31 static int nfiles_done = 0;
32
33
34 static void read_log(const char *wentp, FILE *fp_ou,int dfrom,int duntil)
35 {
36 FILE *fp_in = NULL;
37 char *buf;
38 char leks[5], sep[2], res[MAXLEN];
39 char hour[15];
40 char source[128], list[128];
41 char url[MAX_URL_LEN];
42 char user[MAX_USER_LEN];
43 char ip[45];
44 long long int lmon, lday, lyear;
45 int mon, day, year;
46 int idata=0;
47 int i;
48 char *str;
49 bool id_is_ip;
50 struct getwordstruct gwarea;
51 struct getwordstruct gwarea1;
52 struct userinfostruct *uinfo;
53 longline line;
54
55 if(debug) {
56 debuga(_("Reading redirector log file %s\n"),wentp);
57 }
58
59 /* With squidGuard, you can log groups in only one log file.
60 We must parse each log files only one time. Example :
61 dest porn {
62 domainlist porn/domains
63 urllist porn/urls
64 log file1.log
65 }
66 dest aggressive {
67 domainlist aggressive/domains
68 urllist aggressive/urls
69 log file2.log
70 }
71 dest audio-video {
72 domainlist audio-video/domains
73 urllist audio-video/urls
74 log file1.log
75 }
76 */
77 for (i=0; i<nfiles_done; i++)
78 if (!strcmp(wentp, files_done[i])) return;
79
80 nfiles_done++;
81 files_done = realloc(files_done, nfiles_done*sizeof(char *));
82 if (!files_done) {
83 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
84 exit(EXIT_FAILURE);
85 }
86 files_done[nfiles_done-1] = strdup(wentp);
87 if (!files_done[nfiles_done-1]) {
88 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
89 exit(EXIT_FAILURE);
90 }
91
92 if ((fp_in=fopen(wentp,"r"))==NULL) {
93 debuga(_("(squidguard) Cannot open log file %s\n"),wentp);
94 exit(EXIT_FAILURE);
95 }
96
97 if ((line=longline_create())==NULL) {
98 debuga(_("Not enough memory to read the redirector log\n"));
99 exit(EXIT_FAILURE);
100 }
101
102 while ((buf=longline_read(fp_in,line)) != NULL) {
103 getword_start(&gwarea,buf);
104 if(RedirectorLogFormat[0] != '\0') {
105 getword_start(&gwarea1,RedirectorLogFormat);
106 leks[0]='\0';
107 if (getword(leks,sizeof(leks),&gwarea1,'#')<0) {
108 debuga(_("There is a broken record or garbage in your %s file\n"),wentp);
109 exit(EXIT_FAILURE);
110 }
111 year=0;
112 mon=0;
113 day=0;
114 hour[0]='\0';
115 source[0]='\0';
116 list[0]='\0';
117 ip[0]='\0';
118 user[0]='\0';
119 url[0]='\0';
120 while(strcmp(leks,"end") != 0) {
121 if (getword(leks,sizeof(leks),&gwarea1,'#')<0 || getword(sep,sizeof(sep),&gwarea1,'#')<0) {
122 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp);
123 exit(EXIT_FAILURE);
124 }
125 if(strcmp(leks,"end") != 0) {
126 if (getword_limit(res,sizeof(res),&gwarea,sep[0])<0) {
127 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp);
128 exit(EXIT_FAILURE);
129 }
130 if(strcmp(leks,"year") == 0) {
131 year=atoi(res);
132 } else if(strcmp(leks,"mon") == 0) {
133 mon=atoi(res);
134 } else if(strcmp(leks,"day") == 0) {
135 day=atoi(res);
136 } else if(strcmp(leks,"hour") == 0) {
137 if (strlen(res)>=sizeof(hour)) {
138 debuga(_("Hour string too long in redirector log file %s\n"),wentp);
139 exit(EXIT_FAILURE);
140 }
141 strcpy(hour,res);
142 } else if(strcmp(leks,"source") == 0) {
143 if (strlen(res)>=sizeof(source)) {
144 debuga(_("Banning source name too long in redirector log file %s\n"),wentp);
145 exit(EXIT_FAILURE);
146 }
147 strcpy(source,res);
148 } else if(strcmp(leks,"list") == 0) {
149 if (strlen(res)>=sizeof(list)) {
150 debuga(_("Banning list name too long in redirector log file %s\n"),wentp);
151 exit(EXIT_FAILURE);
152 }
153 strcpy(list,res);
154 } else if(strcmp(leks,"ip") == 0) {
155 if (strlen(res)>=sizeof(ip)) {
156 debuga(_("IP address too long in redirector log file %s\n"),wentp);
157 exit(EXIT_FAILURE);
158 }
159 strcpy(ip,res);
160 } else if(strcmp(leks,"user") == 0) {
161 if (strlen(res)>=sizeof(user)) {
162 debuga(_("User ID too long in redirector log file %s\n"),wentp);
163 exit(EXIT_FAILURE);
164 }
165 strcpy(user,res);
166 } else if(strcmp(leks,"url") == 0) {
167 if (strlen(res)>=sizeof(url)) {
168 debuga(_("URL too long in redirector log file %s\n"),wentp);
169 exit(EXIT_FAILURE);
170 }
171 strcpy(url,res);
172 }
173 }
174 }
175 } else {
176 if (getword_atoll(&lyear,&gwarea,'-')<0 || getword_atoll(&lmon,&gwarea,'-')<0 ||
177 getword_atoll(&lday,&gwarea,' ')<0) {
178 debuga(_("Invalid date found in file %s\n"),wentp);
179 exit(EXIT_FAILURE);
180 }
181 year=(int)lyear;
182 mon=(int)lmon;
183 day=(int)lday;
184 if (getword(hour,sizeof(hour),&gwarea,' ')<0) {
185 debuga(_("Invalid time found in file %s\n"),wentp);
186 exit(EXIT_FAILURE);
187 }
188 if (getword_skip(MAXLEN,&gwarea,'(')<0 || getword(source,sizeof(source),&gwarea,'/')<0) {
189 debuga(_("Invalid redirected source in file %s\n"),wentp);
190 exit(EXIT_FAILURE);
191 }
192 if (getword(list,sizeof(list),&gwarea,'/')<0) {
193 debuga(_("Invalid redirected list in file %s\n"),wentp);
194 exit(EXIT_FAILURE);
195 }
196 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword_limit(url,sizeof(url),&gwarea,' ')<0) {
197 debuga(_("Invalid URL in file %s\n"),wentp);
198 exit(EXIT_FAILURE);
199 }
200 if (getword(ip,sizeof(ip),&gwarea,'/')<0) {
201 debuga(_("Invalid source IP in file %s\n"),wentp);
202 exit(EXIT_FAILURE);
203 }
204 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
205 debuga(_("Invalid user in file %s\n"),wentp);
206 exit(EXIT_FAILURE);
207 }
208 /*
209 The URL may be "http://url:port/data" if the method is GET or simply "url:port/" if the method is CONNECT.
210 The following code removes the protocol:// if it is detected and always truncates the URL after the domain name.
211 It will fail if the URL doesn't start with the protocol and contains two consecutive / in the path (i.e.
212 the URL is not normalized).
213 */
214 str=strchr(url,'/');
215 if (str) {
216 if (str[1]=='/') {
217 str+=2;
218 for (i=0 ; *str && *str!='/' ; i++) url[i]=*str++;
219 url[i]='\0';
220 } else {
221 *str='\0';
222 }
223 }
224 }
225
226 //sprintf(warea,"%04d%02d%02d",year,mon,day);
227
228 if(RedirectorFilterOutDate) {
229 idata = year*10000+mon*100+day;
230 if(idata < dfrom || idata > duntil)
231 continue;
232 }
233
234 if(UserIp) {
235 strcpy(user,ip);
236 id_is_ip=true;
237 } else {
238 id_is_ip=false;
239 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
240 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
241 strcpy(user,ip);
242 id_is_ip=true;
243 }
244 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
245 continue;
246 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
247 strcpy(user,"everybody");
248 }
249 }
250 uinfo=userinfo_find_from_id(user);
251 if (!uinfo) {
252 uinfo=userinfo_create(user);
253 uinfo->id_is_ip=id_is_ip;
254 uinfo->no_report=true;
255 if(Ip2Name && id_is_ip) ip2name(user,sizeof(user));
256 user_find(uinfo->label,MAX_USER_LEN, user);
257 }
258 fprintf(fp_ou,"%s\t%04d%02d%02d\t%s\t%s\t%s\t",uinfo->id,year,mon,day,hour,ip,url);
259 if (source[0] && list[0])
260 fprintf(fp_ou,"%s/%s\n",source,list);
261 else if (source[0])
262 fprintf(fp_ou,"%s\n",source);
263 else
264 fprintf(fp_ou,"%s\n",list);
265 squidguard_count++;
266 }
267 fclose(fp_in);
268 longline_destroy(&line);
269 return;
270 }
271
272
273 void squidguard_log(void)
274 {
275 FILE *fp_ou = NULL, *fp_guard = NULL;
276 char buf[MAXLEN];
277 char guard_in[MAXLEN];
278 char guard_ou[MAXLEN];
279 char logdir[MAXLEN];
280 char user[MAXLEN];
281 char tmp6[MAXLEN];
282 int i;
283 int y;
284 int cstatus;
285 int dfrom, duntil;
286 char *str;
287 char *str2;
288
289 str2 = user;
290
291 if(SquidGuardConf[0] == '\0' && NRedirectorLogs == 0)
292 return;
293
294 sprintf(guard_in,"%s/redirector.unsort",tmp);
295 sprintf(guard_ou,"%s/redirector.log",tmp);
296 if((fp_ou=fopen(guard_in,"a"))==NULL) {
297 debuga(_("(squidguard) Cannot open log file %s\n"),guard_in);
298 exit(EXIT_FAILURE);
299 }
300
301 dfrom=(period.start.tm_year+1900)*10000+(period.start.tm_mon+1)*100+period.start.tm_mday;
302 duntil=(period.end.tm_year+1900)*10000+(period.end.tm_mon+1)*100+period.end.tm_mday;
303
304 if (NRedirectorLogs>0) {
305 for (i=0 ; i<NRedirectorLogs ; i++)
306 read_log(RedirectorLogs[i],fp_ou,dfrom,duntil);
307 } else {
308 if(access(SquidGuardConf, R_OK) != 0) {
309 debuga(_("Cannot open squidGuard config file: %s\n"),SquidGuardConf);
310 exit(EXIT_FAILURE);
311 }
312
313 if((fp_guard=fopen(SquidGuardConf,"r"))==NULL) {
314 debuga(_("(squidguard) Cannot open log file %s\n"),SquidGuardConf);
315 exit(EXIT_FAILURE);
316 }
317
318 logdir[0]=0;
319 while(fgets(buf,sizeof(buf),fp_guard)!=NULL) {
320 fixendofline(buf);
321 if((str=get_param_value("logdir",buf))!=NULL) {
322 /*
323 We want to tolerate spaces inside the directory name but we must also
324 remove the trailing spaces left by the editor after the directory name.
325 This should not be a problem as nobody use a file name with trailing spaces.
326 */
327 for (y=strlen(str)-1 ; y>=0 && (unsigned char)str[y]<=' ' ; y--);
328 if (y>=sizeof(logdir)-1) y=sizeof(logdir)-2;
329 logdir[y+1] = '\0';
330 while (y>=0) {
331 logdir[y] = str[y];
332 y--;
333 }
334 } else if((str=get_param_value("log",buf))!=NULL) {
335 if((str2=get_param_value("anonymous",str))!=NULL)
336 str=str2;
337
338 /*
339 If logdir is defined, we prepend it to the log file name, otherwise, we assume
340 the log directive provides an absolute file name to the log file. Therefore,
341 we don't need to add an additionnal / at the beginning of the log file name.
342 */
343 y=(logdir[0]) ? sprintf(wentp,"%s/",logdir) : 0;
344 /*
345 Spaces are allowed in the name of the log file. The file name ends at the first #
346 because it is assumed it is an end of line comment. Any space before the # is then
347 removed. Any control character (i.e. a character with a code lower than 32) ends
348 the file name. That includes the terminating zero.
349 */
350 while((unsigned char)*str>=' ' && *str!='#' && y<sizeof(wentp)-1)
351 wentp[y++]=*str++;
352 if(*str=='#') {
353 str--;
354 while(*str==' ' && y>0) {
355 str--;
356 y--;
357 }
358 }
359 wentp[y]=0;
360 read_log(wentp,fp_ou,dfrom,duntil);
361 }
362 }
363 }
364
365 if (fp_guard) fclose(fp_guard);
366 if (fp_ou) fclose(fp_ou);
367
368 if (files_done) {
369 for (y=0; y<nfiles_done; y++)
370 if (files_done[y]) free(files_done[y]);
371 free(files_done);
372 }
373
374 if(debug) {
375 debuga(_("Sorting file: %s\n"),guard_ou);
376 }
377
378 sprintf(tmp6,"sort -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in, guard_ou);
379 cstatus=system(tmp6);
380 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
381 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
382 debuga(_("sort command: %s\n"),tmp6);
383 exit(EXIT_FAILURE);
384 }
385
386 if (unlink(guard_in)) {
387 debuga(_("Cannot delete %s - %s\n"),guard_in,strerror(errno));
388 exit(EXIT_FAILURE);
389 }
390 return;
391 }