]>
Commit | Line | Data |
---|---|---|
25697a35 | 1 | /* |
94ff9470 | 2 | * SARG Squid Analysis Report Generator http://sarg.sourceforge.net |
1164c474 | 3 | * 1998, 2010 |
25697a35 GS |
4 | * |
5 | * SARG donations: | |
6 | * please look at http://sarg.sourceforge.net/donations.php | |
1164c474 FM |
7 | * Support: |
8 | * http://sourceforge.net/projects/sarg/forums/forum/363374 | |
25697a35 GS |
9 | * --------------------------------------------------------------------- |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License as published by | |
13 | * the Free Software Foundation; either version 2 of the License, or | |
14 | * (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; if not, write to the Free Software | |
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. | |
24 | * | |
25 | */ | |
26 | ||
27 | #include "include/conf.h" | |
5f3cfd1d | 28 | #include "include/defs.h" |
25697a35 | 29 | |
d6e703cc FM |
30 | static char **files_done = NULL; |
31 | static int nfiles_done = 0; | |
32 | ||
33 | ||
42b117e3 | 34 | static void read_log(const char *wentp, FILE *fp_ou,int dfrom,int duntil) |
491b862f GS |
35 | { |
36 | FILE *fp_in = NULL; | |
2d4c92a1 | 37 | char *buf; |
491b862f | 38 | char leks[5], sep[2], res[MAXLEN]; |
8ea5d58d | 39 | char hour[15]; |
45634030 | 40 | char source[128], list[128]; |
ac422f9b | 41 | char url[MAX_URL_LEN]; |
2240dcea | 42 | char user[MAX_USER_LEN]; |
06b39c87 | 43 | char ip[25]; |
8ea5d58d FM |
44 | long long int lmon, lday, lyear; |
45 | int mon, day, year; | |
2240dcea | 46 | int idata=0; |
d6e703cc | 47 | int i; |
06e3cc62 | 48 | char *str; |
987e8375 | 49 | bool id_is_ip; |
9c7c6346 FM |
50 | struct getwordstruct gwarea; |
51 | struct getwordstruct gwarea1; | |
987e8375 | 52 | struct userinfostruct *uinfo; |
2d4c92a1 | 53 | longline line; |
491b862f GS |
54 | |
55 | if(debug) { | |
1f482a8d | 56 | debuga(_("Reading redirector log file %s\n"),wentp); |
491b862f | 57 | } |
d6e703cc FM |
58 | |
59 | /* With squidGuard, you can log groups in only one log file. | |
60 | We must parse each log files only one time. Example : | |
61 | dest porn { | |
62 | domainlist porn/domains | |
63 | urllist porn/urls | |
64 | log file1.log | |
65 | } | |
66 | dest aggressive { | |
67 | domainlist aggressive/domains | |
68 | urllist aggressive/urls | |
69 | log file2.log | |
70 | } | |
71 | dest audio-video { | |
72 | domainlist audio-video/domains | |
73 | urllist audio-video/urls | |
74 | log file1.log | |
75 | } | |
76 | */ | |
77 | for (i=0; i<nfiles_done; i++) | |
78 | if (!strcmp(wentp, files_done[i])) return; | |
06e3cc62 | 79 | |
d6e703cc FM |
80 | nfiles_done++; |
81 | files_done = realloc(files_done, nfiles_done*sizeof(char *)); | |
82 | if (!files_done) { | |
1f482a8d | 83 | debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno)); |
f76230ca | 84 | exit(EXIT_FAILURE); |
d6e703cc FM |
85 | } |
86 | files_done[nfiles_done-1] = strdup(wentp); | |
87 | if (!files_done[nfiles_done-1]) { | |
1f482a8d | 88 | debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno)); |
f76230ca | 89 | exit(EXIT_FAILURE); |
d6e703cc FM |
90 | } |
91 | ||
491b862f | 92 | if ((fp_in=fopen(wentp,"r"))==NULL) { |
f76230ca | 93 | debuga(_("(squidguard) Cannot open log file %s\n"),wentp); |
06b39c87 | 94 | exit(EXIT_FAILURE); |
491b862f | 95 | } |
06e3cc62 | 96 | |
2d4c92a1 FM |
97 | if ((line=longline_create())==NULL) { |
98 | debuga(_("Not enough memory to read the redirector log\n")); | |
99 | exit(EXIT_FAILURE); | |
100 | } | |
101 | ||
102 | while ((buf=longline_read(fp_in,line)) != NULL) { | |
9c7c6346 | 103 | getword_start(&gwarea,buf); |
1f482a8d FM |
104 | if(RedirectorLogFormat[0] != '\0') { |
105 | getword_start(&gwarea1,RedirectorLogFormat); | |
491b862f | 106 | leks[0]='\0'; |
9c7c6346 | 107 | if (getword(leks,sizeof(leks),&gwarea1,'#')<0) { |
10210234 | 108 | debuga(_("There is a broken record or garbage in your %s file\n"),wentp); |
06b39c87 | 109 | exit(EXIT_FAILURE); |
4bcb77cf | 110 | } |
8ea5d58d FM |
111 | year=0; |
112 | mon=0; | |
113 | day=0; | |
491b862f | 114 | while(strcmp(leks,"end") != 0) { |
9c7c6346 | 115 | if (getword(leks,sizeof(leks),&gwarea1,'#')<0 || getword(sep,sizeof(sep),&gwarea1,'#')<0) { |
10210234 | 116 | debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp); |
06b39c87 | 117 | exit(EXIT_FAILURE); |
4bcb77cf | 118 | } |
491b862f | 119 | if(strcmp(leks,"end") != 0) { |
2d4c92a1 | 120 | if (getword_limit(res,sizeof(res),&gwarea,sep[0])<0) { |
10210234 | 121 | debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp); |
06b39c87 | 122 | exit(EXIT_FAILURE); |
4bcb77cf | 123 | } |
2240dcea | 124 | if(strcmp(leks,"year") == 0) { |
8ea5d58d | 125 | year=atoi(res); |
2240dcea | 126 | } else if(strcmp(leks,"mon") == 0) { |
8ea5d58d | 127 | mon=atoi(res); |
2240dcea | 128 | } else if(strcmp(leks,"day") == 0) { |
8ea5d58d | 129 | day=atoi(res); |
2240dcea FM |
130 | } else if(strcmp(leks,"hour") == 0) { |
131 | if (strlen(res)>=sizeof(hour)) { | |
1f482a8d | 132 | debuga(_("Hour string too long in redirector log file %s\n"),wentp); |
06b39c87 | 133 | exit(EXIT_FAILURE); |
2240dcea | 134 | } |
c2b35d69 | 135 | strcpy(hour,res); |
2240dcea FM |
136 | } else if(strcmp(leks,"list") == 0) { |
137 | if (strlen(res)>=sizeof(list)) { | |
1f482a8d | 138 | debuga(_("Banning list name too long in redirector log file %s\n"),wentp); |
06b39c87 | 139 | exit(EXIT_FAILURE); |
2240dcea | 140 | } |
491b862f | 141 | strcpy(list,res); |
2240dcea FM |
142 | } else if(strcmp(leks,"ip") == 0) { |
143 | if (strlen(res)>=sizeof(ip)) { | |
1f482a8d | 144 | debuga(_("IP address too long in redirector log file %s\n"),wentp); |
06b39c87 | 145 | exit(EXIT_FAILURE); |
2240dcea | 146 | } |
491b862f | 147 | strcpy(ip,res); |
2240dcea FM |
148 | } else if(strcmp(leks,"user") == 0) { |
149 | if (strlen(res)>=sizeof(user)) { | |
1f482a8d | 150 | debuga(_("User ID too long in redirector log file %s\n"),wentp); |
06b39c87 | 151 | exit(EXIT_FAILURE); |
2240dcea | 152 | } |
491b862f | 153 | strcpy(user,res); |
2240dcea FM |
154 | } else if(strcmp(leks,"url") == 0) { |
155 | if (strlen(res)>=sizeof(url)) { | |
1f482a8d | 156 | debuga(_("URL too long in redirector log file %s\n"),wentp); |
06b39c87 | 157 | exit(EXIT_FAILURE); |
2240dcea | 158 | } |
491b862f | 159 | strcpy(url,res); |
2240dcea | 160 | } |
491b862f GS |
161 | } |
162 | } | |
163 | } else { | |
8ea5d58d | 164 | if (getword_atoll(&lyear,&gwarea,'-')<0 || getword_atoll(&lmon,&gwarea,'-')<0 || |
2d4c92a1 FM |
165 | getword_atoll(&lday,&gwarea,' ')<0) { |
166 | debuga(_("Invalid date found in file %s\n"),wentp); | |
06b39c87 | 167 | exit(EXIT_FAILURE); |
4bcb77cf | 168 | } |
8ea5d58d FM |
169 | year=(int)lyear; |
170 | mon=(int)lmon; | |
171 | day=(int)lday; | |
2d4c92a1 FM |
172 | if (getword(hour,sizeof(hour),&gwarea,' ')<0) { |
173 | debuga(_("Invalid time found in file %s\n"),wentp); | |
174 | exit(EXIT_FAILURE); | |
175 | } | |
176 | if (getword_skip(MAXLEN,&gwarea,'(')<0 || getword(source,sizeof(source),&gwarea,'/')<0) { | |
177 | debuga(_("Invalid redirected source in file %s\n"),wentp); | |
178 | exit(EXIT_FAILURE); | |
179 | } | |
180 | if (getword(list,sizeof(list),&gwarea,'/')<0) { | |
181 | debuga(_("Invalid redirected list in file %s\n"),wentp); | |
182 | exit(EXIT_FAILURE); | |
183 | } | |
184 | if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword_limit(url,sizeof(url),&gwarea,' ')<0) { | |
185 | debuga(_("Invalid URL in file %s\n"),wentp); | |
186 | exit(EXIT_FAILURE); | |
187 | } | |
188 | if (getword(ip,sizeof(ip),&gwarea,'/')<0) { | |
189 | debuga(_("Invalid source IP in file %s\n"),wentp); | |
190 | exit(EXIT_FAILURE); | |
191 | } | |
192 | if (getword_skip(MAXLEN,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) { | |
193 | debuga(_("Invalid user in file %s\n"),wentp); | |
194 | exit(EXIT_FAILURE); | |
195 | } | |
c11e2033 FM |
196 | /* |
197 | The URL may be "http://url:port/data" if the method is GET or simply "url:port/" if the method is CONNECT. | |
198 | The following code removes the protocol:// if it is detected and always truncates the URL after the domain name. | |
199 | It will fail if the URL doesn't start with the protocol and contains two consecutive / in the path (i.e. | |
200 | the URL is not normalized). | |
201 | */ | |
06e3cc62 | 202 | str=strchr(url,'/'); |
c11e2033 FM |
203 | if (str) { |
204 | if (str[1]=='/') { | |
205 | str+=2; | |
206 | for (i=0 ; *str && *str!='/' ; i++) url[i]=*str++; | |
207 | url[i]='\0'; | |
208 | } else { | |
209 | *str='\0'; | |
210 | } | |
211 | } | |
491b862f GS |
212 | } |
213 | ||
8ea5d58d | 214 | //sprintf(warea,"%04d%02d%02d",year,mon,day); |
491b862f | 215 | |
69fa7b48 | 216 | if(RedirectorIgnoreDate) { |
8ea5d58d | 217 | idata = year*10000+mon*100+day; |
28e2bf65 | 218 | if(idata < dfrom || idata > duntil) |
491b862f GS |
219 | continue; |
220 | } | |
221 | ||
987e8375 | 222 | if(UserIp) { |
491b862f | 223 | strcpy(user,ip); |
987e8375 FM |
224 | id_is_ip=true; |
225 | } else { | |
226 | id_is_ip=false; | |
227 | if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) { | |
228 | if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) { | |
229 | strcpy(user,ip); | |
230 | id_is_ip=true; | |
231 | } | |
232 | if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE) | |
233 | continue; | |
234 | if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY) | |
235 | strcpy(user,"everybody"); | |
236 | } | |
237 | } | |
238 | uinfo=userinfo_find_from_id(user); | |
239 | if (!uinfo) { | |
240 | uinfo=userinfo_create(user); | |
241 | uinfo->id_is_ip=id_is_ip; | |
242 | if(Ip2Name && id_is_ip) ip2name(user,sizeof(user)); | |
243 | user_find(uinfo->label,MAX_USER_LEN, user); | |
491b862f | 244 | } |
8ea5d58d | 245 | fprintf(fp_ou,"%s\t%04d%02d%02d\t%s\t%s\t%s\t%s/%s\n",uinfo->id,year,mon,day,hour,ip,url,source,list); |
491b862f GS |
246 | squidguard_count++; |
247 | } | |
2d4c92a1 FM |
248 | fclose(fp_in); |
249 | longline_destroy(&line); | |
491b862f GS |
250 | return; |
251 | } | |
252 | ||
253 | ||
32e71fa4 | 254 | void squidguard_log(void) |
25697a35 | 255 | { |
491b862f | 256 | FILE *fp_ou = NULL, *fp_guard = NULL; |
06b39c87 | 257 | char buf[MAXLEN]; |
25697a35 GS |
258 | char guard_in[MAXLEN]; |
259 | char guard_ou[MAXLEN]; | |
260 | char logdir[MAXLEN]; | |
25697a35 | 261 | char user[MAXLEN]; |
2240dcea | 262 | char tmp6[MAXLEN]; |
1f482a8d | 263 | int i; |
5f3cfd1d | 264 | int y; |
456d78a5 | 265 | int cstatus; |
42b117e3 | 266 | int dfrom, duntil; |
5f3cfd1d FM |
267 | char *str; |
268 | char *str2; | |
25697a35 | 269 | |
d6e703cc FM |
270 | str2 = user; |
271 | ||
1f482a8d | 272 | if(SquidGuardConf[0] == '\0' && NRedirectorLogs == 0) |
25697a35 GS |
273 | return; |
274 | ||
987e8375 FM |
275 | sprintf(guard_in,"%s/redirector.unsort",tmp); |
276 | sprintf(guard_ou,"%s/redirector.log",tmp); | |
25697a35 | 277 | if((fp_ou=fopen(guard_in,"a"))==NULL) { |
f76230ca | 278 | debuga(_("(squidguard) Cannot open log file %s\n"),guard_in); |
06b39c87 | 279 | exit(EXIT_FAILURE); |
25697a35 GS |
280 | } |
281 | ||
42b117e3 FM |
282 | dfrom=(period.start.tm_year+1900)*10000+(period.start.tm_mon+1)*100+period.start.tm_mday; |
283 | duntil=(period.end.tm_year+1900)*10000+(period.end.tm_mon+1)*100+period.end.tm_mday; | |
25697a35 | 284 | |
1f482a8d FM |
285 | if (NRedirectorLogs>0) { |
286 | for (i=0 ; i<NRedirectorLogs ; i++) | |
287 | read_log(RedirectorLogs[i],fp_ou,dfrom,duntil); | |
288 | } else { | |
491b862f | 289 | if(access(SquidGuardConf, R_OK) != 0) { |
10210234 | 290 | debuga(_("Cannot open squidGuard config file: %s\n"),SquidGuardConf); |
06b39c87 | 291 | exit(EXIT_FAILURE); |
491b862f | 292 | } |
25697a35 | 293 | |
491b862f | 294 | if((fp_guard=fopen(SquidGuardConf,"r"))==NULL) { |
f76230ca | 295 | debuga(_("(squidguard) Cannot open log file %s\n"),SquidGuardConf); |
06b39c87 | 296 | exit(EXIT_FAILURE); |
491b862f | 297 | } |
5f3cfd1d FM |
298 | |
299 | logdir[0]=0; | |
491b862f | 300 | while(fgets(buf,sizeof(buf),fp_guard)!=NULL) { |
9c7c6346 | 301 | fixendofline(buf); |
5f3cfd1d | 302 | if((str=get_param_value("logdir",buf))!=NULL) { |
9c7c6346 FM |
303 | /* |
304 | We want to tolerate spaces inside the directory name but we must also | |
305 | remove the trailing spaces left by the editor after the directory name. | |
306 | This should not be a problem as nobody use a file name with trailing spaces. | |
307 | */ | |
308 | for (y=strlen(str)-1 ; y>=0 && (unsigned char)str[y]<=' ' ; y--); | |
309 | if (y>=sizeof(logdir)-1) y=sizeof(logdir)-2; | |
310 | logdir[y+1] = '\0'; | |
311 | while (y>=0) { | |
312 | logdir[y] = str[y]; | |
313 | y--; | |
4bcb77cf | 314 | } |
5f3cfd1d FM |
315 | } else if((str=get_param_value("log",buf))!=NULL) { |
316 | if((str2=get_param_value("anonymous",str))!=NULL) | |
317 | str=str2; | |
d6e703cc | 318 | |
5f3cfd1d FM |
319 | /* |
320 | If logdir is defined, we prepend it to the log file name, otherwise, we assume | |
321 | the log directive provides an absolute file name to the log file. Therefore, | |
322 | we don't need to add an additionnal / at the beginning of the log file name. | |
323 | */ | |
324 | y=(logdir[0]) ? sprintf(wentp,"%s/",logdir) : 0; | |
325 | /* | |
326 | Spaces are allowed in the name of the log file. The file name ends at the first # | |
327 | because it is assumed it is an end of line comment. Any space before the # is then | |
328 | removed. Any control character (i.e. a character with a code lower than 32) ends | |
329 | the file name. That includes the terminating zero. | |
330 | */ | |
331 | while((unsigned char)*str>=' ' && *str!='#' && y<sizeof(wentp)-1) | |
332 | wentp[y++]=*str++; | |
333 | if(*str=='#') { | |
334 | str--; | |
335 | while(*str==' ' && y>0) { | |
336 | str--; | |
337 | y--; | |
4bcb77cf | 338 | } |
d6e703cc | 339 | } |
5f3cfd1d | 340 | wentp[y]=0; |
42b117e3 | 341 | read_log(wentp,fp_ou,dfrom,duntil); |
25697a35 | 342 | } |
25697a35 | 343 | } |
491b862f GS |
344 | } |
345 | ||
346 | if (fp_guard) fclose(fp_guard); | |
347 | if (fp_ou) fclose(fp_ou); | |
348 | ||
c274f011 FM |
349 | if (files_done) { |
350 | for (y=0; y<nfiles_done; y++) | |
351 | if (files_done[y]) free(files_done[y]); | |
352 | free(files_done); | |
353 | } | |
354 | ||
491b862f | 355 | if(debug) { |
10210234 | 356 | debuga(_("Sorting file: %s\n"),guard_ou); |
25697a35 GS |
357 | } |
358 | ||
9a2efbd0 | 359 | sprintf(tmp6,"sort -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in, guard_ou); |
456d78a5 FM |
360 | cstatus=system(tmp6); |
361 | if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) { | |
f76230ca FM |
362 | debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus)); |
363 | debuga(_("sort command: %s\n"),tmp6); | |
06b39c87 | 364 | exit(EXIT_FAILURE); |
456d78a5 | 365 | } |
491b862f | 366 | |
25697a35 GS |
367 | unlink(guard_in); |
368 | return; | |
369 | } |