]> git.ipfire.org Git - thirdparty/sarg.git/blob - squidguard_log.c
Accept unlimited URL length in the redirector log (thanks to Joseph L Casale).
[thirdparty/sarg.git] / squidguard_log.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2010
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 static char **files_done = NULL;
31 static int nfiles_done = 0;
32
33
34 static void read_log(const char *wentp, FILE *fp_ou,int dfrom,int duntil)
35 {
36 FILE *fp_in = NULL;
37 char *buf;
38 char leks[5], sep[2], res[MAXLEN];
39 char hour[15];
40 char source[128], list[128];
41 char url[MAX_URL_LEN];
42 char user[MAX_USER_LEN];
43 char ip[25];
44 long long int lmon, lday, lyear;
45 int mon, day, year;
46 int idata=0;
47 int i;
48 char *str;
49 bool id_is_ip;
50 struct getwordstruct gwarea;
51 struct getwordstruct gwarea1;
52 struct userinfostruct *uinfo;
53 longline line;
54
55 if(debug) {
56 debuga(_("Reading redirector log file %s\n"),wentp);
57 }
58
59 /* With squidGuard, you can log groups in only one log file.
60 We must parse each log files only one time. Example :
61 dest porn {
62 domainlist porn/domains
63 urllist porn/urls
64 log file1.log
65 }
66 dest aggressive {
67 domainlist aggressive/domains
68 urllist aggressive/urls
69 log file2.log
70 }
71 dest audio-video {
72 domainlist audio-video/domains
73 urllist audio-video/urls
74 log file1.log
75 }
76 */
77 for (i=0; i<nfiles_done; i++)
78 if (!strcmp(wentp, files_done[i])) return;
79
80 nfiles_done++;
81 files_done = realloc(files_done, nfiles_done*sizeof(char *));
82 if (!files_done) {
83 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
84 exit(EXIT_FAILURE);
85 }
86 files_done[nfiles_done-1] = strdup(wentp);
87 if (!files_done[nfiles_done-1]) {
88 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
89 exit(EXIT_FAILURE);
90 }
91
92 if ((fp_in=fopen(wentp,"r"))==NULL) {
93 debuga(_("(squidguard) Cannot open log file %s\n"),wentp);
94 exit(EXIT_FAILURE);
95 }
96
97 if ((line=longline_create())==NULL) {
98 debuga(_("Not enough memory to read the redirector log\n"));
99 exit(EXIT_FAILURE);
100 }
101
102 while ((buf=longline_read(fp_in,line)) != NULL) {
103 getword_start(&gwarea,buf);
104 if(RedirectorLogFormat[0] != '\0') {
105 getword_start(&gwarea1,RedirectorLogFormat);
106 leks[0]='\0';
107 if (getword(leks,sizeof(leks),&gwarea1,'#')<0) {
108 debuga(_("There is a broken record or garbage in your %s file\n"),wentp);
109 exit(EXIT_FAILURE);
110 }
111 year=0;
112 mon=0;
113 day=0;
114 while(strcmp(leks,"end") != 0) {
115 if (getword(leks,sizeof(leks),&gwarea1,'#')<0 || getword(sep,sizeof(sep),&gwarea1,'#')<0) {
116 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp);
117 exit(EXIT_FAILURE);
118 }
119 if(strcmp(leks,"end") != 0) {
120 if (getword_limit(res,sizeof(res),&gwarea,sep[0])<0) {
121 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp);
122 exit(EXIT_FAILURE);
123 }
124 if(strcmp(leks,"year") == 0) {
125 year=atoi(res);
126 } else if(strcmp(leks,"mon") == 0) {
127 mon=atoi(res);
128 } else if(strcmp(leks,"day") == 0) {
129 day=atoi(res);
130 } else if(strcmp(leks,"hour") == 0) {
131 if (strlen(res)>=sizeof(hour)) {
132 debuga(_("Hour string too long in redirector log file %s\n"),wentp);
133 exit(EXIT_FAILURE);
134 }
135 strcpy(hour,res);
136 } else if(strcmp(leks,"list") == 0) {
137 if (strlen(res)>=sizeof(list)) {
138 debuga(_("Banning list name too long in redirector log file %s\n"),wentp);
139 exit(EXIT_FAILURE);
140 }
141 strcpy(list,res);
142 } else if(strcmp(leks,"ip") == 0) {
143 if (strlen(res)>=sizeof(ip)) {
144 debuga(_("IP address too long in redirector log file %s\n"),wentp);
145 exit(EXIT_FAILURE);
146 }
147 strcpy(ip,res);
148 } else if(strcmp(leks,"user") == 0) {
149 if (strlen(res)>=sizeof(user)) {
150 debuga(_("User ID too long in redirector log file %s\n"),wentp);
151 exit(EXIT_FAILURE);
152 }
153 strcpy(user,res);
154 } else if(strcmp(leks,"url") == 0) {
155 if (strlen(res)>=sizeof(url)) {
156 debuga(_("URL too long in redirector log file %s\n"),wentp);
157 exit(EXIT_FAILURE);
158 }
159 strcpy(url,res);
160 }
161 }
162 }
163 } else {
164 if (getword_atoll(&lyear,&gwarea,'-')<0 || getword_atoll(&lmon,&gwarea,'-')<0 ||
165 getword_atoll(&lday,&gwarea,' ')<0) {
166 debuga(_("Invalid date found in file %s\n"),wentp);
167 exit(EXIT_FAILURE);
168 }
169 year=(int)lyear;
170 mon=(int)lmon;
171 day=(int)lday;
172 if (getword(hour,sizeof(hour),&gwarea,' ')<0) {
173 debuga(_("Invalid time found in file %s\n"),wentp);
174 exit(EXIT_FAILURE);
175 }
176 if (getword_skip(MAXLEN,&gwarea,'(')<0 || getword(source,sizeof(source),&gwarea,'/')<0) {
177 debuga(_("Invalid redirected source in file %s\n"),wentp);
178 exit(EXIT_FAILURE);
179 }
180 if (getword(list,sizeof(list),&gwarea,'/')<0) {
181 debuga(_("Invalid redirected list in file %s\n"),wentp);
182 exit(EXIT_FAILURE);
183 }
184 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword_limit(url,sizeof(url),&gwarea,' ')<0) {
185 debuga(_("Invalid URL in file %s\n"),wentp);
186 exit(EXIT_FAILURE);
187 }
188 if (getword(ip,sizeof(ip),&gwarea,'/')<0) {
189 debuga(_("Invalid source IP in file %s\n"),wentp);
190 exit(EXIT_FAILURE);
191 }
192 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
193 debuga(_("Invalid user in file %s\n"),wentp);
194 exit(EXIT_FAILURE);
195 }
196 /*
197 The URL may be "http://url:port/data" if the method is GET or simply "url:port/" if the method is CONNECT.
198 The following code removes the protocol:// if it is detected and always truncates the URL after the domain name.
199 It will fail if the URL doesn't start with the protocol and contains two consecutive / in the path (i.e.
200 the URL is not normalized).
201 */
202 str=strchr(url,'/');
203 if (str) {
204 if (str[1]=='/') {
205 str+=2;
206 for (i=0 ; *str && *str!='/' ; i++) url[i]=*str++;
207 url[i]='\0';
208 } else {
209 *str='\0';
210 }
211 }
212 }
213
214 //sprintf(warea,"%04d%02d%02d",year,mon,day);
215
216 if(RedirectorIgnoreDate) {
217 idata = year*10000+mon*100+day;
218 if(idata < dfrom || idata > duntil)
219 continue;
220 }
221
222 if(UserIp) {
223 strcpy(user,ip);
224 id_is_ip=true;
225 } else {
226 id_is_ip=false;
227 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
228 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
229 strcpy(user,ip);
230 id_is_ip=true;
231 }
232 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
233 continue;
234 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
235 strcpy(user,"everybody");
236 }
237 }
238 uinfo=userinfo_find_from_id(user);
239 if (!uinfo) {
240 uinfo=userinfo_create(user);
241 uinfo->id_is_ip=id_is_ip;
242 if(Ip2Name && id_is_ip) ip2name(user,sizeof(user));
243 user_find(uinfo->label,MAX_USER_LEN, user);
244 }
245 fprintf(fp_ou,"%s\t%04d%02d%02d\t%s\t%s\t%s\t%s/%s\n",uinfo->id,year,mon,day,hour,ip,url,source,list);
246 squidguard_count++;
247 }
248 fclose(fp_in);
249 longline_destroy(&line);
250 return;
251 }
252
253
254 void squidguard_log(void)
255 {
256 FILE *fp_ou = NULL, *fp_guard = NULL;
257 char buf[MAXLEN];
258 char guard_in[MAXLEN];
259 char guard_ou[MAXLEN];
260 char logdir[MAXLEN];
261 char user[MAXLEN];
262 char tmp6[MAXLEN];
263 int i;
264 int y;
265 int cstatus;
266 int dfrom, duntil;
267 char *str;
268 char *str2;
269
270 str2 = user;
271
272 if(SquidGuardConf[0] == '\0' && NRedirectorLogs == 0)
273 return;
274
275 sprintf(guard_in,"%s/redirector.unsort",tmp);
276 sprintf(guard_ou,"%s/redirector.log",tmp);
277 if((fp_ou=fopen(guard_in,"a"))==NULL) {
278 debuga(_("(squidguard) Cannot open log file %s\n"),guard_in);
279 exit(EXIT_FAILURE);
280 }
281
282 dfrom=(period.start.tm_year+1900)*10000+(period.start.tm_mon+1)*100+period.start.tm_mday;
283 duntil=(period.end.tm_year+1900)*10000+(period.end.tm_mon+1)*100+period.end.tm_mday;
284
285 if (NRedirectorLogs>0) {
286 for (i=0 ; i<NRedirectorLogs ; i++)
287 read_log(RedirectorLogs[i],fp_ou,dfrom,duntil);
288 } else {
289 if(access(SquidGuardConf, R_OK) != 0) {
290 debuga(_("Cannot open squidGuard config file: %s\n"),SquidGuardConf);
291 exit(EXIT_FAILURE);
292 }
293
294 if((fp_guard=fopen(SquidGuardConf,"r"))==NULL) {
295 debuga(_("(squidguard) Cannot open log file %s\n"),SquidGuardConf);
296 exit(EXIT_FAILURE);
297 }
298
299 logdir[0]=0;
300 while(fgets(buf,sizeof(buf),fp_guard)!=NULL) {
301 fixendofline(buf);
302 if((str=get_param_value("logdir",buf))!=NULL) {
303 /*
304 We want to tolerate spaces inside the directory name but we must also
305 remove the trailing spaces left by the editor after the directory name.
306 This should not be a problem as nobody use a file name with trailing spaces.
307 */
308 for (y=strlen(str)-1 ; y>=0 && (unsigned char)str[y]<=' ' ; y--);
309 if (y>=sizeof(logdir)-1) y=sizeof(logdir)-2;
310 logdir[y+1] = '\0';
311 while (y>=0) {
312 logdir[y] = str[y];
313 y--;
314 }
315 } else if((str=get_param_value("log",buf))!=NULL) {
316 if((str2=get_param_value("anonymous",str))!=NULL)
317 str=str2;
318
319 /*
320 If logdir is defined, we prepend it to the log file name, otherwise, we assume
321 the log directive provides an absolute file name to the log file. Therefore,
322 we don't need to add an additionnal / at the beginning of the log file name.
323 */
324 y=(logdir[0]) ? sprintf(wentp,"%s/",logdir) : 0;
325 /*
326 Spaces are allowed in the name of the log file. The file name ends at the first #
327 because it is assumed it is an end of line comment. Any space before the # is then
328 removed. Any control character (i.e. a character with a code lower than 32) ends
329 the file name. That includes the terminating zero.
330 */
331 while((unsigned char)*str>=' ' && *str!='#' && y<sizeof(wentp)-1)
332 wentp[y++]=*str++;
333 if(*str=='#') {
334 str--;
335 while(*str==' ' && y>0) {
336 str--;
337 y--;
338 }
339 }
340 wentp[y]=0;
341 read_log(wentp,fp_ou,dfrom,duntil);
342 }
343 }
344 }
345
346 if (fp_guard) fclose(fp_guard);
347 if (fp_ou) fclose(fp_ou);
348
349 if (files_done) {
350 for (y=0; y<nfiles_done; y++)
351 if (files_done[y]) free(files_done[y]);
352 free(files_done);
353 }
354
355 if(debug) {
356 debuga(_("Sorting file: %s\n"),guard_ou);
357 }
358
359 sprintf(tmp6,"sort -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in, guard_ou);
360 cstatus=system(tmp6);
361 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
362 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
363 debuga(_("sort command: %s\n"),tmp6);
364 exit(EXIT_FAILURE);
365 }
366
367 unlink(guard_in);
368 return;
369 }