]> git.ipfire.org Git - thirdparty/sarg.git/blob - squidguard_log.c
Use one function to parse the lines from sarg-general instead of multiple codes scatt...
[thirdparty/sarg.git] / squidguard_log.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2010
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 static char **files_done = NULL;
31 static int nfiles_done = 0;
32
33
34 static void read_log(const char *wentp, FILE *fp_ou)
35 {
36 FILE *fp_in = NULL;
37 char leks[5], sep[2], res[MAXLEN];
38 char mon[20], day[3], year[5], hour[15];
39 char list[MAXLEN];
40 char wdata[127];
41 char url[MAX_URL_LEN];
42 char user[MAX_USER_LEN];
43 int idata=0;
44 int i;
45 char *str;
46 struct getwordstruct gwarea;
47 struct getwordstruct gwarea1;
48
49 if(debug) {
50 getword_start(&gwarea,text[7]);
51 if (getword(url,sizeof(url),&gwarea,' ')<0 || getword_skip(MAXLEN,&gwarea,' ')<0) {
52 printf("SARG: Maybe you have a broken record or garbage in your %s string.\n",text[7]);
53 exit(1);
54 }
55 debuga("%s squidGuard %s: %s",url,gwarea.current,wentp);
56 }
57
58 /* With squidGuard, you can log groups in only one log file.
59 We must parse each log files only one time. Example :
60 dest porn {
61 domainlist porn/domains
62 urllist porn/urls
63 log file1.log
64 }
65 dest aggressive {
66 domainlist aggressive/domains
67 urllist aggressive/urls
68 log file2.log
69 }
70 dest audio-video {
71 domainlist audio-video/domains
72 urllist audio-video/urls
73 log file1.log
74 }
75 */
76 for (i=0; i<nfiles_done; i++)
77 if (!strcmp(wentp, files_done[i])) return;
78
79 nfiles_done++;
80 files_done = realloc(files_done, nfiles_done*sizeof(char *));
81 if (!files_done) {
82 perror("parse squidGuard - realloc");
83 exit(EXIT_FAILURE);
84 }
85 files_done[nfiles_done-1] = strdup(wentp);
86 if (!files_done[nfiles_done-1]) {
87 perror("parse squidGuard - strdup");
88 exit(EXIT_FAILURE);
89 }
90
91 if ((fp_in=fopen(wentp,"r"))==NULL) {
92 fprintf(stderr, "SARG: (squidguard) %s: %s\n",text[8],wentp);
93 exit(1);
94 }
95
96 while (fgets(buf,sizeof(buf),fp_in) != NULL) {
97 getword_start(&gwarea,buf);
98 if(SquidGuardLogFormat[0] != '\0') {
99 getword_start(&gwarea1,SquidGuardLogFormat);
100 leks[0]='\0';
101 if (getword(leks,sizeof(leks),&gwarea1,'#')<0) {
102 debuga(_("Maybe you have a broken record or garbage in your %s file."),wentp);
103 exit(1);
104 }
105 while(strcmp(leks,"end") != 0) {
106 if (getword(leks,sizeof(leks),&gwarea1,'#')<0 || getword(sep,sizeof(sep),&gwarea1,'#')<0) {
107 debuga(_("Maybe you have a broken record or garbage in your %s file."),wentp);
108 exit(1);
109 }
110 if(strcmp(leks,"end") != 0) {
111 if (getword(res,sizeof(res),&gwarea,sep[0])<0) {
112 debuga(_("Maybe you have a broken record or garbage in your %s file."),wentp);
113 exit(1);
114 }
115 if(strcmp(leks,"year") == 0) {
116 if (strlen(res)>=sizeof(year)) {
117 debuga(_("Year string too long in squidGuard log file %s"),wentp);
118 exit(1);
119 }
120 strcpy(year,res);
121 } else if(strcmp(leks,"mon") == 0) {
122 if (strlen(res)>=sizeof(mon)) {
123 debuga(_("Month string too long in squidGuard log file %s"),wentp);
124 exit(1);
125 }
126 strcpy(mon,res);
127 } else if(strcmp(leks,"day") == 0) {
128 if (strlen(res)>=sizeof(day)) {
129 debuga(_("Day string too long in squidGuard log file %s"),wentp);
130 exit(1);
131 }
132 strcpy(day,res);
133 } else if(strcmp(leks,"hour") == 0) {
134 if (strlen(res)>=sizeof(hour)) {
135 debuga(_("Hour string too long in squidGuard log file %s"),wentp);
136 exit(1);
137 }
138 strcpy(hour,res);
139 } else if(strcmp(leks,"list") == 0) {
140 if (strlen(res)>=sizeof(list)) {
141 debuga(_("Banning list name too long in squidGuard log file %s"),wentp);
142 exit(1);
143 }
144 strcpy(list,res);
145 } else if(strcmp(leks,"ip") == 0) {
146 if (strlen(res)>=sizeof(ip)) {
147 debuga(_("IP address too long in squidGuard log file %s"),wentp);
148 exit(1);
149 }
150 strcpy(ip,res);
151 } else if(strcmp(leks,"user") == 0) {
152 if (strlen(res)>=sizeof(user)) {
153 debuga(_("User ID too long in squidGuard log file %s"),wentp);
154 exit(1);
155 }
156 strcpy(user,res);
157 } else if(strcmp(leks,"url") == 0) {
158 if (strlen(res)>=sizeof(url)) {
159 debuga(_("URL too long in squidGuard log file %s"),wentp);
160 exit(1);
161 }
162 strcpy(url,res);
163 }
164 }
165 }
166 } else {
167 if (getword(year,sizeof(year),&gwarea,'-')<0 || getword(mon,sizeof(mon),&gwarea,'-')<0 ||
168 getword(day,sizeof(day),&gwarea,' ')<0 || getword(hour,sizeof(hour),&gwarea,' ')<0 ||
169 getword_skip(MAXLEN,&gwarea,'/')<0 || getword(list,sizeof(list),&gwarea,'/')<0 ||
170 getword_skip(MAXLEN,&gwarea,' ')<0 || getword(url,sizeof(url),&gwarea,' ')<0 ||
171 getword(ip,sizeof(ip),&gwarea,'/')<0 || getword_skip(MAXLEN,&gwarea,' ')<0 ||
172 getword(user,sizeof(user),&gwarea,' ')<0) {
173 debuga(_("Maybe you have a broken record or garbage in your %s file."),wentp);
174 exit(1);
175 }
176 /*
177 The URL may be "http://url:port/data" if the method is GET or simply "url:port/" if the method is CONNECT.
178 The following code removes the protocol:// if it is detected and always truncates the URL after the domain name.
179 It will fail if the URL doesn't start with the protocol and contains two consecutive / in the path (i.e.
180 the URL is not normalized).
181 */
182 str=strchr(url,'/');
183 if (str) {
184 if (str[1]=='/') {
185 str+=2;
186 for (i=0 ; *str && *str!='/' ; i++) url[i]=*str++;
187 url[i]='\0';
188 } else {
189 *str='\0';
190 }
191 }
192 }
193
194 sprintf(warea,"%s%s%s",year,mon,day);
195 sprintf(wdata,"%s%s%s",year,mon,day);
196 idata = atoi(wdata);
197
198 if(SquidguardIgnoreDate) {
199 if(idata < dfrom || idata > duntil)
200 continue;
201 }
202
203 if (strcmp(user,"-") == 0) {
204 strcpy(user,ip);
205 ip[0]='\0';
206 }
207 fprintf(fp_ou,"%s\t%s%s%s\t%s\t%s\t%s\t%s\n",user,year,mon,day,hour,ip,url,list);
208 squidguard_count++;
209 }
210 if (fp_in) fclose(fp_in);
211 return;
212 }
213
214
215 void squidguard_log(void)
216 {
217 FILE *fp_ou = NULL, *fp_guard = NULL;
218 char guard_in[MAXLEN];
219 char guard_ou[MAXLEN];
220 char logdir[MAXLEN];
221 char year[10], day[10], mon[10];
222 char user[MAXLEN];
223 char tmp6[MAXLEN];
224 int y;
225 int cstatus;
226 char *str;
227 char *str2;
228
229 str2 = user;
230
231 if(strlen(SquidGuardConf) < 1 && strlen(SquidGuardLogAlternate) < 1)
232 return;
233
234 if (SquidGuardLogAlternate[0] != '\0')
235 SquidGuardConf[0]='\0';
236
237 sprintf(guard_in,"%s/squidguard.unsort",tmp);
238 sprintf(guard_ou,"%s/squidguard.log",tmp);
239 if((fp_ou=fopen(guard_in,"a"))==NULL) {
240 fprintf(stderr, "SARG: (squidguard) %s: %s\n",text[8],guard_in);
241 exit(1);
242 }
243
244 bzero(day, 3);
245 bzero(mon, 4);
246 bzero(year, 5);
247
248 if(SquidguardIgnoreDate) {
249 if(strcmp(df,"e") == 0) {
250 strncpy(day,period,2);
251 strncpy(mon,period+2,3);
252 strncpy(year,period+5,4);
253 conv_month(mon);
254 sprintf(warea,"%s%s%s",year,mon,day);
255 dfrom=atoi(warea);
256 strncpy(day,period+10,2);
257 strncpy(mon,period+12,3);
258 strncpy(year,period+15,4);
259 conv_month(mon);
260 sprintf(warea,"%s%s%s",year,mon,day);
261 duntil=atoi(warea);
262 } else {
263 strncpy(day,period+7,2);
264 strncpy(mon,period+4,3);
265 strncpy(year,period,4);
266 conv_month(mon);
267 sprintf(warea,"%s%s%s",year,mon,day);
268 dfrom=atoi(warea);
269 strncpy(day,period+17,2);
270 strncpy(mon,period+14,3);
271 strncpy(year,period+10,4);
272 conv_month(mon);
273 sprintf(warea,"%s%s%s",year,mon,day);
274 duntil=atoi(warea);
275 }
276 }
277
278 if(SquidGuardConf[0] != 0) {
279 if(access(SquidGuardConf, R_OK) != 0) {
280 debuga("Cannot open squidGuard config file: %s",SquidGuardConf);
281 exit(1);
282 }
283
284 if((fp_guard=fopen(SquidGuardConf,"r"))==NULL) {
285 fprintf(stderr, "SARG: (squidguard) %s: %s\n",text[8],SquidGuardConf);
286 exit(1);
287 }
288
289 logdir[0]=0;
290 while(fgets(buf,sizeof(buf),fp_guard)!=NULL) {
291 fixendofline(buf);
292 if((str=get_param_value("logdir",buf))!=NULL) {
293 /*
294 We want to tolerate spaces inside the directory name but we must also
295 remove the trailing spaces left by the editor after the directory name.
296 This should not be a problem as nobody use a file name with trailing spaces.
297 */
298 for (y=strlen(str)-1 ; y>=0 && (unsigned char)str[y]<=' ' ; y--);
299 if (y>=sizeof(logdir)-1) y=sizeof(logdir)-2;
300 logdir[y+1] = '\0';
301 while (y>=0) {
302 logdir[y] = str[y];
303 y--;
304 }
305 } else if((str=get_param_value("log",buf))!=NULL) {
306 if((str2=get_param_value("anonymous",str))!=NULL)
307 str=str2;
308
309 /*
310 If logdir is defined, we prepend it to the log file name, otherwise, we assume
311 the log directive provides an absolute file name to the log file. Therefore,
312 we don't need to add an additionnal / at the beginning of the log file name.
313 */
314 y=(logdir[0]) ? sprintf(wentp,"%s/",logdir) : 0;
315 /*
316 Spaces are allowed in the name of the log file. The file name ends at the first #
317 because it is assumed it is an end of line comment. Any space before the # is then
318 removed. Any control character (i.e. a character with a code lower than 32) ends
319 the file name. That includes the terminating zero.
320 */
321 while((unsigned char)*str>=' ' && *str!='#' && y<sizeof(wentp)-1)
322 wentp[y++]=*str++;
323 if(*str=='#') {
324 str--;
325 while(*str==' ' && y>0) {
326 str--;
327 y--;
328 }
329 }
330 wentp[y]=0;
331 read_log(wentp,fp_ou);
332 }
333 }
334 } else {
335 sprintf(wentp,"%s",SquidGuardLogAlternate);
336 read_log(wentp,fp_ou);
337 }
338
339 if (fp_guard) fclose(fp_guard);
340 if (fp_ou) fclose(fp_ou);
341
342 if (files_done) {
343 for (y=0; y<nfiles_done; y++)
344 if (files_done[y]) free(files_done[y]);
345 free(files_done);
346 }
347
348 if(debug) {
349 debuga("%s: %s",text[54],guard_ou);
350 }
351
352 sprintf(tmp6,"sort -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in, guard_ou);
353 cstatus=system(tmp6);
354 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
355 fprintf(stderr, "SARG: sort command return status %d\n",WEXITSTATUS(cstatus));
356 fprintf(stderr, "SARG: sort command: %s\n",tmp6);
357 exit(1);
358 }
359
360 unlink(guard_in);
361 return;
362 }