]> git.ipfire.org Git - thirdparty/sarg.git/blame - squidguard_log.c
Convert or split all the input logs
[thirdparty/sarg.git] / squidguard_log.c
CommitLineData
25697a35 1/*
94ff9470 2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
1164c474 3 * 1998, 2010
25697a35
GS
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
1164c474
FM
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
25697a35
GS
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27#include "include/conf.h"
5f3cfd1d 28#include "include/defs.h"
25697a35 29
d6e703cc
FM
30static char **files_done = NULL;
31static int nfiles_done = 0;
32
33
42b117e3 34static void read_log(const char *wentp, FILE *fp_ou,int dfrom,int duntil)
491b862f
GS
35{
36 FILE *fp_in = NULL;
06b39c87 37 char buf[MAXLEN];
491b862f 38 char leks[5], sep[2], res[MAXLEN];
8ea5d58d 39 char hour[15];
45634030 40 char source[128], list[128];
ac422f9b 41 char url[MAX_URL_LEN];
2240dcea 42 char user[MAX_USER_LEN];
06b39c87 43 char ip[25];
8ea5d58d
FM
44 long long int lmon, lday, lyear;
45 int mon, day, year;
2240dcea 46 int idata=0;
d6e703cc 47 int i;
06e3cc62 48 char *str;
987e8375 49 bool id_is_ip;
9c7c6346
FM
50 struct getwordstruct gwarea;
51 struct getwordstruct gwarea1;
987e8375 52 struct userinfostruct *uinfo;
491b862f
GS
53
54 if(debug) {
1f482a8d 55 debuga(_("Reading redirector log file %s\n"),wentp);
491b862f 56 }
d6e703cc
FM
57
58 /* With squidGuard, you can log groups in only one log file.
59 We must parse each log files only one time. Example :
60 dest porn {
61 domainlist porn/domains
62 urllist porn/urls
63 log file1.log
64 }
65 dest aggressive {
66 domainlist aggressive/domains
67 urllist aggressive/urls
68 log file2.log
69 }
70 dest audio-video {
71 domainlist audio-video/domains
72 urllist audio-video/urls
73 log file1.log
74 }
75 */
76 for (i=0; i<nfiles_done; i++)
77 if (!strcmp(wentp, files_done[i])) return;
06e3cc62 78
d6e703cc
FM
79 nfiles_done++;
80 files_done = realloc(files_done, nfiles_done*sizeof(char *));
81 if (!files_done) {
1f482a8d 82 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
f76230ca 83 exit(EXIT_FAILURE);
d6e703cc
FM
84 }
85 files_done[nfiles_done-1] = strdup(wentp);
86 if (!files_done[nfiles_done-1]) {
1f482a8d 87 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
f76230ca 88 exit(EXIT_FAILURE);
d6e703cc
FM
89 }
90
491b862f 91 if ((fp_in=fopen(wentp,"r"))==NULL) {
f76230ca 92 debuga(_("(squidguard) Cannot open log file %s\n"),wentp);
06b39c87 93 exit(EXIT_FAILURE);
491b862f 94 }
06e3cc62 95
491b862f 96 while (fgets(buf,sizeof(buf),fp_in) != NULL) {
9c7c6346 97 getword_start(&gwarea,buf);
1f482a8d
FM
98 if(RedirectorLogFormat[0] != '\0') {
99 getword_start(&gwarea1,RedirectorLogFormat);
491b862f 100 leks[0]='\0';
9c7c6346 101 if (getword(leks,sizeof(leks),&gwarea1,'#')<0) {
10210234 102 debuga(_("There is a broken record or garbage in your %s file\n"),wentp);
06b39c87 103 exit(EXIT_FAILURE);
4bcb77cf 104 }
8ea5d58d
FM
105 year=0;
106 mon=0;
107 day=0;
491b862f 108 while(strcmp(leks,"end") != 0) {
9c7c6346 109 if (getword(leks,sizeof(leks),&gwarea1,'#')<0 || getword(sep,sizeof(sep),&gwarea1,'#')<0) {
10210234 110 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp);
06b39c87 111 exit(EXIT_FAILURE);
4bcb77cf 112 }
491b862f 113 if(strcmp(leks,"end") != 0) {
9c7c6346 114 if (getword(res,sizeof(res),&gwarea,sep[0])<0) {
10210234 115 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp);
06b39c87 116 exit(EXIT_FAILURE);
4bcb77cf 117 }
2240dcea
FM
118 if(strcmp(leks,"year") == 0) {
119 if (strlen(res)>=sizeof(year)) {
1f482a8d 120 debuga(_("Year string too long in redirector log file %s\n"),wentp);
06b39c87 121 exit(EXIT_FAILURE);
2240dcea 122 }
8ea5d58d 123 year=atoi(res);
2240dcea
FM
124 } else if(strcmp(leks,"mon") == 0) {
125 if (strlen(res)>=sizeof(mon)) {
1f482a8d 126 debuga(_("Month string too long in redirector log file %s\n"),wentp);
06b39c87 127 exit(EXIT_FAILURE);
2240dcea 128 }
8ea5d58d 129 mon=atoi(res);
2240dcea
FM
130 } else if(strcmp(leks,"day") == 0) {
131 if (strlen(res)>=sizeof(day)) {
1f482a8d 132 debuga(_("Day string too long in redirector log file %s\n"),wentp);
06b39c87 133 exit(EXIT_FAILURE);
2240dcea 134 }
8ea5d58d 135 day=atoi(res);
2240dcea
FM
136 } else if(strcmp(leks,"hour") == 0) {
137 if (strlen(res)>=sizeof(hour)) {
1f482a8d 138 debuga(_("Hour string too long in redirector log file %s\n"),wentp);
06b39c87 139 exit(EXIT_FAILURE);
2240dcea 140 }
8ea5d58d
FM
141 strncpy(hour,res,sizeof(hour)-1);
142 hour[sizeof(hour)-1]='\0';
2240dcea
FM
143 } else if(strcmp(leks,"list") == 0) {
144 if (strlen(res)>=sizeof(list)) {
1f482a8d 145 debuga(_("Banning list name too long in redirector log file %s\n"),wentp);
06b39c87 146 exit(EXIT_FAILURE);
2240dcea 147 }
491b862f 148 strcpy(list,res);
2240dcea
FM
149 } else if(strcmp(leks,"ip") == 0) {
150 if (strlen(res)>=sizeof(ip)) {
1f482a8d 151 debuga(_("IP address too long in redirector log file %s\n"),wentp);
06b39c87 152 exit(EXIT_FAILURE);
2240dcea 153 }
491b862f 154 strcpy(ip,res);
2240dcea
FM
155 } else if(strcmp(leks,"user") == 0) {
156 if (strlen(res)>=sizeof(user)) {
1f482a8d 157 debuga(_("User ID too long in redirector log file %s\n"),wentp);
06b39c87 158 exit(EXIT_FAILURE);
2240dcea 159 }
491b862f 160 strcpy(user,res);
2240dcea
FM
161 } else if(strcmp(leks,"url") == 0) {
162 if (strlen(res)>=sizeof(url)) {
1f482a8d 163 debuga(_("URL too long in redirector log file %s\n"),wentp);
06b39c87 164 exit(EXIT_FAILURE);
2240dcea 165 }
491b862f 166 strcpy(url,res);
2240dcea 167 }
491b862f
GS
168 }
169 }
170 } else {
8ea5d58d
FM
171 if (getword_atoll(&lyear,&gwarea,'-')<0 || getword_atoll(&lmon,&gwarea,'-')<0 ||
172 getword_atoll(&lday,&gwarea,' ')<0 || getword(hour,sizeof(hour),&gwarea,' ')<0 ||
45634030
FM
173 getword_skip(MAXLEN,&gwarea,'(')<0 ||
174 getword(source,sizeof(source),&gwarea,'/')<0 || getword(list,sizeof(list),&gwarea,'/')<0 ||
c11e2033 175 getword_skip(MAXLEN,&gwarea,' ')<0 || getword(url,sizeof(url),&gwarea,' ')<0 ||
9c7c6346
FM
176 getword(ip,sizeof(ip),&gwarea,'/')<0 || getword_skip(MAXLEN,&gwarea,' ')<0 ||
177 getword(user,sizeof(user),&gwarea,' ')<0) {
10210234 178 debuga(_("There is a broken record or garbage in your %s file\n"),wentp);
06b39c87 179 exit(EXIT_FAILURE);
4bcb77cf 180 }
8ea5d58d
FM
181 year=(int)lyear;
182 mon=(int)lmon;
183 day=(int)lday;
c11e2033
FM
184 /*
185 The URL may be "http://url:port/data" if the method is GET or simply "url:port/" if the method is CONNECT.
186 The following code removes the protocol:// if it is detected and always truncates the URL after the domain name.
187 It will fail if the URL doesn't start with the protocol and contains two consecutive / in the path (i.e.
188 the URL is not normalized).
189 */
06e3cc62 190 str=strchr(url,'/');
c11e2033
FM
191 if (str) {
192 if (str[1]=='/') {
193 str+=2;
194 for (i=0 ; *str && *str!='/' ; i++) url[i]=*str++;
195 url[i]='\0';
196 } else {
197 *str='\0';
198 }
199 }
491b862f
GS
200 }
201
8ea5d58d 202 //sprintf(warea,"%04d%02d%02d",year,mon,day);
491b862f 203
69fa7b48 204 if(RedirectorIgnoreDate) {
8ea5d58d 205 idata = year*10000+mon*100+day;
28e2bf65 206 if(idata < dfrom || idata > duntil)
491b862f
GS
207 continue;
208 }
209
987e8375 210 if(UserIp) {
491b862f 211 strcpy(user,ip);
987e8375
FM
212 id_is_ip=true;
213 } else {
214 id_is_ip=false;
215 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
216 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
217 strcpy(user,ip);
218 id_is_ip=true;
219 }
220 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
221 continue;
222 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
223 strcpy(user,"everybody");
224 }
225 }
226 uinfo=userinfo_find_from_id(user);
227 if (!uinfo) {
228 uinfo=userinfo_create(user);
229 uinfo->id_is_ip=id_is_ip;
230 if(Ip2Name && id_is_ip) ip2name(user,sizeof(user));
231 user_find(uinfo->label,MAX_USER_LEN, user);
491b862f 232 }
8ea5d58d 233 fprintf(fp_ou,"%s\t%04d%02d%02d\t%s\t%s\t%s\t%s/%s\n",uinfo->id,year,mon,day,hour,ip,url,source,list);
491b862f
GS
234 squidguard_count++;
235 }
236 if (fp_in) fclose(fp_in);
237 return;
238}
239
240
32e71fa4 241void squidguard_log(void)
25697a35 242{
491b862f 243 FILE *fp_ou = NULL, *fp_guard = NULL;
06b39c87 244 char buf[MAXLEN];
25697a35
GS
245 char guard_in[MAXLEN];
246 char guard_ou[MAXLEN];
247 char logdir[MAXLEN];
25697a35 248 char user[MAXLEN];
2240dcea 249 char tmp6[MAXLEN];
1f482a8d 250 int i;
5f3cfd1d 251 int y;
456d78a5 252 int cstatus;
42b117e3 253 int dfrom, duntil;
5f3cfd1d
FM
254 char *str;
255 char *str2;
25697a35 256
d6e703cc
FM
257 str2 = user;
258
1f482a8d 259 if(SquidGuardConf[0] == '\0' && NRedirectorLogs == 0)
25697a35
GS
260 return;
261
987e8375
FM
262 sprintf(guard_in,"%s/redirector.unsort",tmp);
263 sprintf(guard_ou,"%s/redirector.log",tmp);
25697a35 264 if((fp_ou=fopen(guard_in,"a"))==NULL) {
f76230ca 265 debuga(_("(squidguard) Cannot open log file %s\n"),guard_in);
06b39c87 266 exit(EXIT_FAILURE);
25697a35
GS
267 }
268
42b117e3
FM
269 dfrom=(period.start.tm_year+1900)*10000+(period.start.tm_mon+1)*100+period.start.tm_mday;
270 duntil=(period.end.tm_year+1900)*10000+(period.end.tm_mon+1)*100+period.end.tm_mday;
25697a35 271
1f482a8d
FM
272 if (NRedirectorLogs>0) {
273 for (i=0 ; i<NRedirectorLogs ; i++)
274 read_log(RedirectorLogs[i],fp_ou,dfrom,duntil);
275 } else {
491b862f 276 if(access(SquidGuardConf, R_OK) != 0) {
10210234 277 debuga(_("Cannot open squidGuard config file: %s\n"),SquidGuardConf);
06b39c87 278 exit(EXIT_FAILURE);
491b862f 279 }
25697a35 280
491b862f 281 if((fp_guard=fopen(SquidGuardConf,"r"))==NULL) {
f76230ca 282 debuga(_("(squidguard) Cannot open log file %s\n"),SquidGuardConf);
06b39c87 283 exit(EXIT_FAILURE);
491b862f 284 }
5f3cfd1d
FM
285
286 logdir[0]=0;
491b862f 287 while(fgets(buf,sizeof(buf),fp_guard)!=NULL) {
9c7c6346 288 fixendofline(buf);
5f3cfd1d 289 if((str=get_param_value("logdir",buf))!=NULL) {
9c7c6346
FM
290 /*
291 We want to tolerate spaces inside the directory name but we must also
292 remove the trailing spaces left by the editor after the directory name.
293 This should not be a problem as nobody use a file name with trailing spaces.
294 */
295 for (y=strlen(str)-1 ; y>=0 && (unsigned char)str[y]<=' ' ; y--);
296 if (y>=sizeof(logdir)-1) y=sizeof(logdir)-2;
297 logdir[y+1] = '\0';
298 while (y>=0) {
299 logdir[y] = str[y];
300 y--;
4bcb77cf 301 }
5f3cfd1d
FM
302 } else if((str=get_param_value("log",buf))!=NULL) {
303 if((str2=get_param_value("anonymous",str))!=NULL)
304 str=str2;
d6e703cc 305
5f3cfd1d
FM
306 /*
307 If logdir is defined, we prepend it to the log file name, otherwise, we assume
308 the log directive provides an absolute file name to the log file. Therefore,
309 we don't need to add an additionnal / at the beginning of the log file name.
310 */
311 y=(logdir[0]) ? sprintf(wentp,"%s/",logdir) : 0;
312 /*
313 Spaces are allowed in the name of the log file. The file name ends at the first #
314 because it is assumed it is an end of line comment. Any space before the # is then
315 removed. Any control character (i.e. a character with a code lower than 32) ends
316 the file name. That includes the terminating zero.
317 */
318 while((unsigned char)*str>=' ' && *str!='#' && y<sizeof(wentp)-1)
319 wentp[y++]=*str++;
320 if(*str=='#') {
321 str--;
322 while(*str==' ' && y>0) {
323 str--;
324 y--;
4bcb77cf 325 }
d6e703cc 326 }
5f3cfd1d 327 wentp[y]=0;
42b117e3 328 read_log(wentp,fp_ou,dfrom,duntil);
25697a35 329 }
25697a35 330 }
491b862f
GS
331 }
332
333 if (fp_guard) fclose(fp_guard);
334 if (fp_ou) fclose(fp_ou);
335
c274f011
FM
336 if (files_done) {
337 for (y=0; y<nfiles_done; y++)
338 if (files_done[y]) free(files_done[y]);
339 free(files_done);
340 }
341
491b862f 342 if(debug) {
10210234 343 debuga(_("Sorting file: %s\n"),guard_ou);
25697a35
GS
344 }
345
9a2efbd0 346 sprintf(tmp6,"sort -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in, guard_ou);
456d78a5
FM
347 cstatus=system(tmp6);
348 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
f76230ca
FM
349 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
350 debuga(_("sort command: %s\n"),tmp6);
06b39c87 351 exit(EXIT_FAILURE);
456d78a5 352 }
491b862f 353
25697a35
GS
354 unlink(guard_in);
355 return;
356}