]> git.ipfire.org Git - thirdparty/sarg.git/blame - squidguard_log.c
Accept unlimited URL length in the redirector log (thanks to Joseph L Casale).
[thirdparty/sarg.git] / squidguard_log.c
CommitLineData
25697a35 1/*
94ff9470 2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
1164c474 3 * 1998, 2010
25697a35
GS
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
1164c474
FM
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
25697a35
GS
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27#include "include/conf.h"
5f3cfd1d 28#include "include/defs.h"
25697a35 29
d6e703cc
FM
30static char **files_done = NULL;
31static int nfiles_done = 0;
32
33
42b117e3 34static void read_log(const char *wentp, FILE *fp_ou,int dfrom,int duntil)
491b862f
GS
35{
36 FILE *fp_in = NULL;
2d4c92a1 37 char *buf;
491b862f 38 char leks[5], sep[2], res[MAXLEN];
8ea5d58d 39 char hour[15];
45634030 40 char source[128], list[128];
ac422f9b 41 char url[MAX_URL_LEN];
2240dcea 42 char user[MAX_USER_LEN];
06b39c87 43 char ip[25];
8ea5d58d
FM
44 long long int lmon, lday, lyear;
45 int mon, day, year;
2240dcea 46 int idata=0;
d6e703cc 47 int i;
06e3cc62 48 char *str;
987e8375 49 bool id_is_ip;
9c7c6346
FM
50 struct getwordstruct gwarea;
51 struct getwordstruct gwarea1;
987e8375 52 struct userinfostruct *uinfo;
2d4c92a1 53 longline line;
491b862f
GS
54
55 if(debug) {
1f482a8d 56 debuga(_("Reading redirector log file %s\n"),wentp);
491b862f 57 }
d6e703cc
FM
58
59 /* With squidGuard, you can log groups in only one log file.
60 We must parse each log files only one time. Example :
61 dest porn {
62 domainlist porn/domains
63 urllist porn/urls
64 log file1.log
65 }
66 dest aggressive {
67 domainlist aggressive/domains
68 urllist aggressive/urls
69 log file2.log
70 }
71 dest audio-video {
72 domainlist audio-video/domains
73 urllist audio-video/urls
74 log file1.log
75 }
76 */
77 for (i=0; i<nfiles_done; i++)
78 if (!strcmp(wentp, files_done[i])) return;
06e3cc62 79
d6e703cc
FM
80 nfiles_done++;
81 files_done = realloc(files_done, nfiles_done*sizeof(char *));
82 if (!files_done) {
1f482a8d 83 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
f76230ca 84 exit(EXIT_FAILURE);
d6e703cc
FM
85 }
86 files_done[nfiles_done-1] = strdup(wentp);
87 if (!files_done[nfiles_done-1]) {
1f482a8d 88 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
f76230ca 89 exit(EXIT_FAILURE);
d6e703cc
FM
90 }
91
491b862f 92 if ((fp_in=fopen(wentp,"r"))==NULL) {
f76230ca 93 debuga(_("(squidguard) Cannot open log file %s\n"),wentp);
06b39c87 94 exit(EXIT_FAILURE);
491b862f 95 }
06e3cc62 96
2d4c92a1
FM
97 if ((line=longline_create())==NULL) {
98 debuga(_("Not enough memory to read the redirector log\n"));
99 exit(EXIT_FAILURE);
100 }
101
102 while ((buf=longline_read(fp_in,line)) != NULL) {
9c7c6346 103 getword_start(&gwarea,buf);
1f482a8d
FM
104 if(RedirectorLogFormat[0] != '\0') {
105 getword_start(&gwarea1,RedirectorLogFormat);
491b862f 106 leks[0]='\0';
9c7c6346 107 if (getword(leks,sizeof(leks),&gwarea1,'#')<0) {
10210234 108 debuga(_("There is a broken record or garbage in your %s file\n"),wentp);
06b39c87 109 exit(EXIT_FAILURE);
4bcb77cf 110 }
8ea5d58d
FM
111 year=0;
112 mon=0;
113 day=0;
491b862f 114 while(strcmp(leks,"end") != 0) {
9c7c6346 115 if (getword(leks,sizeof(leks),&gwarea1,'#')<0 || getword(sep,sizeof(sep),&gwarea1,'#')<0) {
10210234 116 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp);
06b39c87 117 exit(EXIT_FAILURE);
4bcb77cf 118 }
491b862f 119 if(strcmp(leks,"end") != 0) {
2d4c92a1 120 if (getword_limit(res,sizeof(res),&gwarea,sep[0])<0) {
10210234 121 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp);
06b39c87 122 exit(EXIT_FAILURE);
4bcb77cf 123 }
2240dcea 124 if(strcmp(leks,"year") == 0) {
8ea5d58d 125 year=atoi(res);
2240dcea 126 } else if(strcmp(leks,"mon") == 0) {
8ea5d58d 127 mon=atoi(res);
2240dcea 128 } else if(strcmp(leks,"day") == 0) {
8ea5d58d 129 day=atoi(res);
2240dcea
FM
130 } else if(strcmp(leks,"hour") == 0) {
131 if (strlen(res)>=sizeof(hour)) {
1f482a8d 132 debuga(_("Hour string too long in redirector log file %s\n"),wentp);
06b39c87 133 exit(EXIT_FAILURE);
2240dcea 134 }
c2b35d69 135 strcpy(hour,res);
2240dcea
FM
136 } else if(strcmp(leks,"list") == 0) {
137 if (strlen(res)>=sizeof(list)) {
1f482a8d 138 debuga(_("Banning list name too long in redirector log file %s\n"),wentp);
06b39c87 139 exit(EXIT_FAILURE);
2240dcea 140 }
491b862f 141 strcpy(list,res);
2240dcea
FM
142 } else if(strcmp(leks,"ip") == 0) {
143 if (strlen(res)>=sizeof(ip)) {
1f482a8d 144 debuga(_("IP address too long in redirector log file %s\n"),wentp);
06b39c87 145 exit(EXIT_FAILURE);
2240dcea 146 }
491b862f 147 strcpy(ip,res);
2240dcea
FM
148 } else if(strcmp(leks,"user") == 0) {
149 if (strlen(res)>=sizeof(user)) {
1f482a8d 150 debuga(_("User ID too long in redirector log file %s\n"),wentp);
06b39c87 151 exit(EXIT_FAILURE);
2240dcea 152 }
491b862f 153 strcpy(user,res);
2240dcea
FM
154 } else if(strcmp(leks,"url") == 0) {
155 if (strlen(res)>=sizeof(url)) {
1f482a8d 156 debuga(_("URL too long in redirector log file %s\n"),wentp);
06b39c87 157 exit(EXIT_FAILURE);
2240dcea 158 }
491b862f 159 strcpy(url,res);
2240dcea 160 }
491b862f
GS
161 }
162 }
163 } else {
8ea5d58d 164 if (getword_atoll(&lyear,&gwarea,'-')<0 || getword_atoll(&lmon,&gwarea,'-')<0 ||
2d4c92a1
FM
165 getword_atoll(&lday,&gwarea,' ')<0) {
166 debuga(_("Invalid date found in file %s\n"),wentp);
06b39c87 167 exit(EXIT_FAILURE);
4bcb77cf 168 }
8ea5d58d
FM
169 year=(int)lyear;
170 mon=(int)lmon;
171 day=(int)lday;
2d4c92a1
FM
172 if (getword(hour,sizeof(hour),&gwarea,' ')<0) {
173 debuga(_("Invalid time found in file %s\n"),wentp);
174 exit(EXIT_FAILURE);
175 }
176 if (getword_skip(MAXLEN,&gwarea,'(')<0 || getword(source,sizeof(source),&gwarea,'/')<0) {
177 debuga(_("Invalid redirected source in file %s\n"),wentp);
178 exit(EXIT_FAILURE);
179 }
180 if (getword(list,sizeof(list),&gwarea,'/')<0) {
181 debuga(_("Invalid redirected list in file %s\n"),wentp);
182 exit(EXIT_FAILURE);
183 }
184 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword_limit(url,sizeof(url),&gwarea,' ')<0) {
185 debuga(_("Invalid URL in file %s\n"),wentp);
186 exit(EXIT_FAILURE);
187 }
188 if (getword(ip,sizeof(ip),&gwarea,'/')<0) {
189 debuga(_("Invalid source IP in file %s\n"),wentp);
190 exit(EXIT_FAILURE);
191 }
192 if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) {
193 debuga(_("Invalid user in file %s\n"),wentp);
194 exit(EXIT_FAILURE);
195 }
c11e2033
FM
196 /*
197 The URL may be "http://url:port/data" if the method is GET or simply "url:port/" if the method is CONNECT.
198 The following code removes the protocol:// if it is detected and always truncates the URL after the domain name.
199 It will fail if the URL doesn't start with the protocol and contains two consecutive / in the path (i.e.
200 the URL is not normalized).
201 */
06e3cc62 202 str=strchr(url,'/');
c11e2033
FM
203 if (str) {
204 if (str[1]=='/') {
205 str+=2;
206 for (i=0 ; *str && *str!='/' ; i++) url[i]=*str++;
207 url[i]='\0';
208 } else {
209 *str='\0';
210 }
211 }
491b862f
GS
212 }
213
8ea5d58d 214 //sprintf(warea,"%04d%02d%02d",year,mon,day);
491b862f 215
69fa7b48 216 if(RedirectorIgnoreDate) {
8ea5d58d 217 idata = year*10000+mon*100+day;
28e2bf65 218 if(idata < dfrom || idata > duntil)
491b862f
GS
219 continue;
220 }
221
987e8375 222 if(UserIp) {
491b862f 223 strcpy(user,ip);
987e8375
FM
224 id_is_ip=true;
225 } else {
226 id_is_ip=false;
227 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
228 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
229 strcpy(user,ip);
230 id_is_ip=true;
231 }
232 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
233 continue;
234 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
235 strcpy(user,"everybody");
236 }
237 }
238 uinfo=userinfo_find_from_id(user);
239 if (!uinfo) {
240 uinfo=userinfo_create(user);
241 uinfo->id_is_ip=id_is_ip;
242 if(Ip2Name && id_is_ip) ip2name(user,sizeof(user));
243 user_find(uinfo->label,MAX_USER_LEN, user);
491b862f 244 }
8ea5d58d 245 fprintf(fp_ou,"%s\t%04d%02d%02d\t%s\t%s\t%s\t%s/%s\n",uinfo->id,year,mon,day,hour,ip,url,source,list);
491b862f
GS
246 squidguard_count++;
247 }
2d4c92a1
FM
248 fclose(fp_in);
249 longline_destroy(&line);
491b862f
GS
250 return;
251}
252
253
32e71fa4 254void squidguard_log(void)
25697a35 255{
491b862f 256 FILE *fp_ou = NULL, *fp_guard = NULL;
06b39c87 257 char buf[MAXLEN];
25697a35
GS
258 char guard_in[MAXLEN];
259 char guard_ou[MAXLEN];
260 char logdir[MAXLEN];
25697a35 261 char user[MAXLEN];
2240dcea 262 char tmp6[MAXLEN];
1f482a8d 263 int i;
5f3cfd1d 264 int y;
456d78a5 265 int cstatus;
42b117e3 266 int dfrom, duntil;
5f3cfd1d
FM
267 char *str;
268 char *str2;
25697a35 269
d6e703cc
FM
270 str2 = user;
271
1f482a8d 272 if(SquidGuardConf[0] == '\0' && NRedirectorLogs == 0)
25697a35
GS
273 return;
274
987e8375
FM
275 sprintf(guard_in,"%s/redirector.unsort",tmp);
276 sprintf(guard_ou,"%s/redirector.log",tmp);
25697a35 277 if((fp_ou=fopen(guard_in,"a"))==NULL) {
f76230ca 278 debuga(_("(squidguard) Cannot open log file %s\n"),guard_in);
06b39c87 279 exit(EXIT_FAILURE);
25697a35
GS
280 }
281
42b117e3
FM
282 dfrom=(period.start.tm_year+1900)*10000+(period.start.tm_mon+1)*100+period.start.tm_mday;
283 duntil=(period.end.tm_year+1900)*10000+(period.end.tm_mon+1)*100+period.end.tm_mday;
25697a35 284
1f482a8d
FM
285 if (NRedirectorLogs>0) {
286 for (i=0 ; i<NRedirectorLogs ; i++)
287 read_log(RedirectorLogs[i],fp_ou,dfrom,duntil);
288 } else {
491b862f 289 if(access(SquidGuardConf, R_OK) != 0) {
10210234 290 debuga(_("Cannot open squidGuard config file: %s\n"),SquidGuardConf);
06b39c87 291 exit(EXIT_FAILURE);
491b862f 292 }
25697a35 293
491b862f 294 if((fp_guard=fopen(SquidGuardConf,"r"))==NULL) {
f76230ca 295 debuga(_("(squidguard) Cannot open log file %s\n"),SquidGuardConf);
06b39c87 296 exit(EXIT_FAILURE);
491b862f 297 }
5f3cfd1d
FM
298
299 logdir[0]=0;
491b862f 300 while(fgets(buf,sizeof(buf),fp_guard)!=NULL) {
9c7c6346 301 fixendofline(buf);
5f3cfd1d 302 if((str=get_param_value("logdir",buf))!=NULL) {
9c7c6346
FM
303 /*
304 We want to tolerate spaces inside the directory name but we must also
305 remove the trailing spaces left by the editor after the directory name.
306 This should not be a problem as nobody use a file name with trailing spaces.
307 */
308 for (y=strlen(str)-1 ; y>=0 && (unsigned char)str[y]<=' ' ; y--);
309 if (y>=sizeof(logdir)-1) y=sizeof(logdir)-2;
310 logdir[y+1] = '\0';
311 while (y>=0) {
312 logdir[y] = str[y];
313 y--;
4bcb77cf 314 }
5f3cfd1d
FM
315 } else if((str=get_param_value("log",buf))!=NULL) {
316 if((str2=get_param_value("anonymous",str))!=NULL)
317 str=str2;
d6e703cc 318
5f3cfd1d
FM
319 /*
320 If logdir is defined, we prepend it to the log file name, otherwise, we assume
321 the log directive provides an absolute file name to the log file. Therefore,
322 we don't need to add an additionnal / at the beginning of the log file name.
323 */
324 y=(logdir[0]) ? sprintf(wentp,"%s/",logdir) : 0;
325 /*
326 Spaces are allowed in the name of the log file. The file name ends at the first #
327 because it is assumed it is an end of line comment. Any space before the # is then
328 removed. Any control character (i.e. a character with a code lower than 32) ends
329 the file name. That includes the terminating zero.
330 */
331 while((unsigned char)*str>=' ' && *str!='#' && y<sizeof(wentp)-1)
332 wentp[y++]=*str++;
333 if(*str=='#') {
334 str--;
335 while(*str==' ' && y>0) {
336 str--;
337 y--;
4bcb77cf 338 }
d6e703cc 339 }
5f3cfd1d 340 wentp[y]=0;
42b117e3 341 read_log(wentp,fp_ou,dfrom,duntil);
25697a35 342 }
25697a35 343 }
491b862f
GS
344 }
345
346 if (fp_guard) fclose(fp_guard);
347 if (fp_ou) fclose(fp_ou);
348
c274f011
FM
349 if (files_done) {
350 for (y=0; y<nfiles_done; y++)
351 if (files_done[y]) free(files_done[y]);
352 free(files_done);
353 }
354
491b862f 355 if(debug) {
10210234 356 debuga(_("Sorting file: %s\n"),guard_ou);
25697a35
GS
357 }
358
9a2efbd0 359 sprintf(tmp6,"sort -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in, guard_ou);
456d78a5
FM
360 cstatus=system(tmp6);
361 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
f76230ca
FM
362 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
363 debuga(_("sort command: %s\n"),tmp6);
06b39c87 364 exit(EXIT_FAILURE);
456d78a5 365 }
491b862f 366
25697a35
GS
367 unlink(guard_in);
368 return;
369}