]> git.ipfire.org Git - thirdparty/sarg.git/blob - squidguard_log.c
fbf80ff5227743cb52f5435aa377fe32dd8ac242
[thirdparty/sarg.git] / squidguard_log.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2010
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 static char **files_done = NULL;
31 static int nfiles_done = 0;
32
33
34 static void read_log(const char *wentp, FILE *fp_ou,int dfrom,int duntil)
35 {
36 FILE *fp_in = NULL;
37 char buf[MAXLEN];
38 char leks[5], sep[2], res[MAXLEN];
39 char hour[15];
40 char source[128], list[128];
41 char url[MAX_URL_LEN];
42 char user[MAX_USER_LEN];
43 char ip[25];
44 long long int lmon, lday, lyear;
45 int mon, day, year;
46 int idata=0;
47 int i;
48 char *str;
49 bool id_is_ip;
50 struct getwordstruct gwarea;
51 struct getwordstruct gwarea1;
52 struct userinfostruct *uinfo;
53
54 if(debug) {
55 debuga(_("Reading redirector log file %s\n"),wentp);
56 }
57
58 /* With squidGuard, you can log groups in only one log file.
59 We must parse each log files only one time. Example :
60 dest porn {
61 domainlist porn/domains
62 urllist porn/urls
63 log file1.log
64 }
65 dest aggressive {
66 domainlist aggressive/domains
67 urllist aggressive/urls
68 log file2.log
69 }
70 dest audio-video {
71 domainlist audio-video/domains
72 urllist audio-video/urls
73 log file1.log
74 }
75 */
76 for (i=0; i<nfiles_done; i++)
77 if (!strcmp(wentp, files_done[i])) return;
78
79 nfiles_done++;
80 files_done = realloc(files_done, nfiles_done*sizeof(char *));
81 if (!files_done) {
82 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
83 exit(EXIT_FAILURE);
84 }
85 files_done[nfiles_done-1] = strdup(wentp);
86 if (!files_done[nfiles_done-1]) {
87 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno));
88 exit(EXIT_FAILURE);
89 }
90
91 if ((fp_in=fopen(wentp,"r"))==NULL) {
92 debuga(_("(squidguard) Cannot open log file %s\n"),wentp);
93 exit(EXIT_FAILURE);
94 }
95
96 while (fgets(buf,sizeof(buf),fp_in) != NULL) {
97 getword_start(&gwarea,buf);
98 if(RedirectorLogFormat[0] != '\0') {
99 getword_start(&gwarea1,RedirectorLogFormat);
100 leks[0]='\0';
101 if (getword(leks,sizeof(leks),&gwarea1,'#')<0) {
102 debuga(_("There is a broken record or garbage in your %s file\n"),wentp);
103 exit(EXIT_FAILURE);
104 }
105 year=0;
106 mon=0;
107 day=0;
108 while(strcmp(leks,"end") != 0) {
109 if (getword(leks,sizeof(leks),&gwarea1,'#')<0 || getword(sep,sizeof(sep),&gwarea1,'#')<0) {
110 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp);
111 exit(EXIT_FAILURE);
112 }
113 if(strcmp(leks,"end") != 0) {
114 if (getword(res,sizeof(res),&gwarea,sep[0])<0) {
115 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp);
116 exit(EXIT_FAILURE);
117 }
118 if(strcmp(leks,"year") == 0) {
119 year=atoi(res);
120 } else if(strcmp(leks,"mon") == 0) {
121 mon=atoi(res);
122 } else if(strcmp(leks,"day") == 0) {
123 day=atoi(res);
124 } else if(strcmp(leks,"hour") == 0) {
125 if (strlen(res)>=sizeof(hour)) {
126 debuga(_("Hour string too long in redirector log file %s\n"),wentp);
127 exit(EXIT_FAILURE);
128 }
129 strcpy(hour,res);
130 } else if(strcmp(leks,"list") == 0) {
131 if (strlen(res)>=sizeof(list)) {
132 debuga(_("Banning list name too long in redirector log file %s\n"),wentp);
133 exit(EXIT_FAILURE);
134 }
135 strcpy(list,res);
136 } else if(strcmp(leks,"ip") == 0) {
137 if (strlen(res)>=sizeof(ip)) {
138 debuga(_("IP address too long in redirector log file %s\n"),wentp);
139 exit(EXIT_FAILURE);
140 }
141 strcpy(ip,res);
142 } else if(strcmp(leks,"user") == 0) {
143 if (strlen(res)>=sizeof(user)) {
144 debuga(_("User ID too long in redirector log file %s\n"),wentp);
145 exit(EXIT_FAILURE);
146 }
147 strcpy(user,res);
148 } else if(strcmp(leks,"url") == 0) {
149 if (strlen(res)>=sizeof(url)) {
150 debuga(_("URL too long in redirector log file %s\n"),wentp);
151 exit(EXIT_FAILURE);
152 }
153 strcpy(url,res);
154 }
155 }
156 }
157 } else {
158 if (getword_atoll(&lyear,&gwarea,'-')<0 || getword_atoll(&lmon,&gwarea,'-')<0 ||
159 getword_atoll(&lday,&gwarea,' ')<0 || getword(hour,sizeof(hour),&gwarea,' ')<0 ||
160 getword_skip(MAXLEN,&gwarea,'(')<0 ||
161 getword(source,sizeof(source),&gwarea,'/')<0 || getword(list,sizeof(list),&gwarea,'/')<0 ||
162 getword_skip(MAXLEN,&gwarea,' ')<0 || getword(url,sizeof(url),&gwarea,' ')<0 ||
163 getword(ip,sizeof(ip),&gwarea,'/')<0 || getword_skip(MAXLEN,&gwarea,' ')<0 ||
164 getword(user,sizeof(user),&gwarea,' ')<0) {
165 debuga(_("There is a broken record or garbage in your %s file\n"),wentp);
166 exit(EXIT_FAILURE);
167 }
168 year=(int)lyear;
169 mon=(int)lmon;
170 day=(int)lday;
171 /*
172 The URL may be "http://url:port/data" if the method is GET or simply "url:port/" if the method is CONNECT.
173 The following code removes the protocol:// if it is detected and always truncates the URL after the domain name.
174 It will fail if the URL doesn't start with the protocol and contains two consecutive / in the path (i.e.
175 the URL is not normalized).
176 */
177 str=strchr(url,'/');
178 if (str) {
179 if (str[1]=='/') {
180 str+=2;
181 for (i=0 ; *str && *str!='/' ; i++) url[i]=*str++;
182 url[i]='\0';
183 } else {
184 *str='\0';
185 }
186 }
187 }
188
189 //sprintf(warea,"%04d%02d%02d",year,mon,day);
190
191 if(RedirectorIgnoreDate) {
192 idata = year*10000+mon*100+day;
193 if(idata < dfrom || idata > duntil)
194 continue;
195 }
196
197 if(UserIp) {
198 strcpy(user,ip);
199 id_is_ip=true;
200 } else {
201 id_is_ip=false;
202 if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) {
203 if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) {
204 strcpy(user,ip);
205 id_is_ip=true;
206 }
207 if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE)
208 continue;
209 if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY)
210 strcpy(user,"everybody");
211 }
212 }
213 uinfo=userinfo_find_from_id(user);
214 if (!uinfo) {
215 uinfo=userinfo_create(user);
216 uinfo->id_is_ip=id_is_ip;
217 if(Ip2Name && id_is_ip) ip2name(user,sizeof(user));
218 user_find(uinfo->label,MAX_USER_LEN, user);
219 }
220 fprintf(fp_ou,"%s\t%04d%02d%02d\t%s\t%s\t%s\t%s/%s\n",uinfo->id,year,mon,day,hour,ip,url,source,list);
221 squidguard_count++;
222 }
223 if (fp_in) fclose(fp_in);
224 return;
225 }
226
227
228 void squidguard_log(void)
229 {
230 FILE *fp_ou = NULL, *fp_guard = NULL;
231 char buf[MAXLEN];
232 char guard_in[MAXLEN];
233 char guard_ou[MAXLEN];
234 char logdir[MAXLEN];
235 char user[MAXLEN];
236 char tmp6[MAXLEN];
237 int i;
238 int y;
239 int cstatus;
240 int dfrom, duntil;
241 char *str;
242 char *str2;
243
244 str2 = user;
245
246 if(SquidGuardConf[0] == '\0' && NRedirectorLogs == 0)
247 return;
248
249 sprintf(guard_in,"%s/redirector.unsort",tmp);
250 sprintf(guard_ou,"%s/redirector.log",tmp);
251 if((fp_ou=fopen(guard_in,"a"))==NULL) {
252 debuga(_("(squidguard) Cannot open log file %s\n"),guard_in);
253 exit(EXIT_FAILURE);
254 }
255
256 dfrom=(period.start.tm_year+1900)*10000+(period.start.tm_mon+1)*100+period.start.tm_mday;
257 duntil=(period.end.tm_year+1900)*10000+(period.end.tm_mon+1)*100+period.end.tm_mday;
258
259 if (NRedirectorLogs>0) {
260 for (i=0 ; i<NRedirectorLogs ; i++)
261 read_log(RedirectorLogs[i],fp_ou,dfrom,duntil);
262 } else {
263 if(access(SquidGuardConf, R_OK) != 0) {
264 debuga(_("Cannot open squidGuard config file: %s\n"),SquidGuardConf);
265 exit(EXIT_FAILURE);
266 }
267
268 if((fp_guard=fopen(SquidGuardConf,"r"))==NULL) {
269 debuga(_("(squidguard) Cannot open log file %s\n"),SquidGuardConf);
270 exit(EXIT_FAILURE);
271 }
272
273 logdir[0]=0;
274 while(fgets(buf,sizeof(buf),fp_guard)!=NULL) {
275 fixendofline(buf);
276 if((str=get_param_value("logdir",buf))!=NULL) {
277 /*
278 We want to tolerate spaces inside the directory name but we must also
279 remove the trailing spaces left by the editor after the directory name.
280 This should not be a problem as nobody use a file name with trailing spaces.
281 */
282 for (y=strlen(str)-1 ; y>=0 && (unsigned char)str[y]<=' ' ; y--);
283 if (y>=sizeof(logdir)-1) y=sizeof(logdir)-2;
284 logdir[y+1] = '\0';
285 while (y>=0) {
286 logdir[y] = str[y];
287 y--;
288 }
289 } else if((str=get_param_value("log",buf))!=NULL) {
290 if((str2=get_param_value("anonymous",str))!=NULL)
291 str=str2;
292
293 /*
294 If logdir is defined, we prepend it to the log file name, otherwise, we assume
295 the log directive provides an absolute file name to the log file. Therefore,
296 we don't need to add an additionnal / at the beginning of the log file name.
297 */
298 y=(logdir[0]) ? sprintf(wentp,"%s/",logdir) : 0;
299 /*
300 Spaces are allowed in the name of the log file. The file name ends at the first #
301 because it is assumed it is an end of line comment. Any space before the # is then
302 removed. Any control character (i.e. a character with a code lower than 32) ends
303 the file name. That includes the terminating zero.
304 */
305 while((unsigned char)*str>=' ' && *str!='#' && y<sizeof(wentp)-1)
306 wentp[y++]=*str++;
307 if(*str=='#') {
308 str--;
309 while(*str==' ' && y>0) {
310 str--;
311 y--;
312 }
313 }
314 wentp[y]=0;
315 read_log(wentp,fp_ou,dfrom,duntil);
316 }
317 }
318 }
319
320 if (fp_guard) fclose(fp_guard);
321 if (fp_ou) fclose(fp_ou);
322
323 if (files_done) {
324 for (y=0; y<nfiles_done; y++)
325 if (files_done[y]) free(files_done[y]);
326 free(files_done);
327 }
328
329 if(debug) {
330 debuga(_("Sorting file: %s\n"),guard_ou);
331 }
332
333 sprintf(tmp6,"sort -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in, guard_ou);
334 cstatus=system(tmp6);
335 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
336 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
337 debuga(_("sort command: %s\n"),tmp6);
338 exit(EXIT_FAILURE);
339 }
340
341 unlink(guard_in);
342 return;
343 }