2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
30 static char **files_done
= NULL
;
31 static int nfiles_done
= 0;
34 static void read_log(const char *wentp
, FILE *fp_ou
,int dfrom
,int duntil
)
38 char leks
[5], sep
[2], res
[MAXLEN
];
40 char source
[128], list
[128];
41 char url
[MAX_URL_LEN
];
42 char user
[MAX_USER_LEN
];
44 long long int lmon
, lday
, lyear
;
50 struct getwordstruct gwarea
;
51 struct getwordstruct gwarea1
;
52 struct userinfostruct
*uinfo
;
56 debuga(_("Reading redirector log file %s\n"),wentp
);
59 /* With squidGuard, you can log groups in only one log file.
60 We must parse each log files only one time. Example :
62 domainlist porn/domains
67 domainlist aggressive/domains
68 urllist aggressive/urls
72 domainlist audio-video/domains
73 urllist audio-video/urls
77 for (i
=0; i
<nfiles_done
; i
++)
78 if (!strcmp(wentp
, files_done
[i
])) return;
81 files_done
= realloc(files_done
, nfiles_done
*sizeof(char *));
83 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
86 files_done
[nfiles_done
-1] = strdup(wentp
);
87 if (!files_done
[nfiles_done
-1]) {
88 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
92 if ((fp_in
=fopen(wentp
,"r"))==NULL
) {
93 debuga(_("(squidguard) Cannot open log file %s\n"),wentp
);
97 if ((line
=longline_create())==NULL
) {
98 debuga(_("Not enough memory to read the redirector log\n"));
102 while ((buf
=longline_read(fp_in
,line
)) != NULL
) {
103 getword_start(&gwarea
,buf
);
104 if(RedirectorLogFormat
[0] != '\0') {
105 getword_start(&gwarea1
,RedirectorLogFormat
);
107 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0) {
108 debuga(_("There is a broken record or garbage in your %s file\n"),wentp
);
120 while(strcmp(leks
,"end") != 0) {
121 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0 || getword(sep
,sizeof(sep
),&gwarea1
,'#')<0) {
122 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp
);
125 if(strcmp(leks
,"end") != 0) {
126 if (getword_limit(res
,sizeof(res
),&gwarea
,sep
[0])<0) {
127 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp
);
130 if(strcmp(leks
,"year") == 0) {
132 } else if(strcmp(leks
,"mon") == 0) {
134 } else if(strcmp(leks
,"day") == 0) {
136 } else if(strcmp(leks
,"hour") == 0) {
137 if (strlen(res
)>=sizeof(hour
)) {
138 debuga(_("Hour string too long in redirector log file %s\n"),wentp
);
142 } else if(strcmp(leks
,"source") == 0) {
143 if (strlen(res
)>=sizeof(source
)) {
144 debuga(_("Banning source name too long in redirector log file %s\n"),wentp
);
148 } else if(strcmp(leks
,"list") == 0) {
149 if (strlen(res
)>=sizeof(list
)) {
150 debuga(_("Banning list name too long in redirector log file %s\n"),wentp
);
154 } else if(strcmp(leks
,"ip") == 0) {
155 if (strlen(res
)>=sizeof(ip
)) {
156 debuga(_("IP address too long in redirector log file %s\n"),wentp
);
160 } else if(strcmp(leks
,"user") == 0) {
161 if (strlen(res
)>=sizeof(user
)) {
162 debuga(_("User ID too long in redirector log file %s\n"),wentp
);
166 } else if(strcmp(leks
,"url") == 0) {
167 if (strlen(res
)>=sizeof(url
)) {
168 debuga(_("URL too long in redirector log file %s\n"),wentp
);
176 if (getword_atoll(&lyear
,&gwarea
,'-')<0 || getword_atoll(&lmon
,&gwarea
,'-')<0 ||
177 getword_atoll(&lday
,&gwarea
,' ')<0) {
178 debuga(_("Invalid date found in file %s\n"),wentp
);
184 if (getword(hour
,sizeof(hour
),&gwarea
,' ')<0) {
185 debuga(_("Invalid time found in file %s\n"),wentp
);
188 if (getword_skip(MAXLEN
,&gwarea
,'(')<0 || getword(source
,sizeof(source
),&gwarea
,'/')<0) {
189 debuga(_("Invalid redirected source in file %s\n"),wentp
);
192 if (getword(list
,sizeof(list
),&gwarea
,'/')<0) {
193 debuga(_("Invalid redirected list in file %s\n"),wentp
);
196 if (getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword_limit(url
,sizeof(url
),&gwarea
,' ')<0) {
197 debuga(_("Invalid URL in file %s\n"),wentp
);
200 if (getword(ip
,sizeof(ip
),&gwarea
,'/')<0) {
201 debuga(_("Invalid source IP in file %s\n"),wentp
);
204 if (getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword(user
,sizeof(user
),&gwarea
,' ')<0) {
205 debuga(_("Invalid user in file %s\n"),wentp
);
209 The URL may be "http://url:port/data" if the method is GET or simply "url:port/" if the method is CONNECT.
210 The following code removes the protocol:// if it is detected and always truncates the URL after the domain name.
211 It will fail if the URL doesn't start with the protocol and contains two consecutive / in the path (i.e.
212 the URL is not normalized).
218 for (i
=0 ; *str
&& *str
!='/' ; i
++) url
[i
]=*str
++;
226 //sprintf(warea,"%04d%02d%02d",year,mon,day);
228 if(RedirectorFilterOutDate
) {
229 idata
= year
*10000+mon
*100+day
;
230 if(idata
< dfrom
|| idata
> duntil
)
239 if(strcmp(user
,"-") == 0 || strcmp(user
," ") == 0 || strcmp(user
,"") == 0) {
240 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IP
) {
244 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IGNORE
)
246 if(RecordsWithoutUser
== RECORDWITHOUTUSER_EVERYBODY
)
247 strcpy(user
,"everybody");
250 uinfo
=userinfo_find_from_id(user
);
252 uinfo
=userinfo_create(user
);
253 uinfo
->id_is_ip
=id_is_ip
;
254 if(Ip2Name
&& id_is_ip
) ip2name(user
,sizeof(user
));
255 user_find(uinfo
->label
,MAX_USER_LEN
, user
);
257 fprintf(fp_ou
,"%s\t%04d%02d%02d\t%s\t%s\t%s\t",uinfo
->id
,year
,mon
,day
,hour
,ip
,url
);
258 if (source
[0] && list
[0])
259 fprintf(fp_ou
,"%s/%s\n",source
,list
);
261 fprintf(fp_ou
,"%s\n",source
);
263 fprintf(fp_ou
,"%s\n",list
);
267 longline_destroy(&line
);
272 void squidguard_log(void)
274 FILE *fp_ou
= NULL
, *fp_guard
= NULL
;
276 char guard_in
[MAXLEN
];
277 char guard_ou
[MAXLEN
];
290 if(SquidGuardConf
[0] == '\0' && NRedirectorLogs
== 0)
293 sprintf(guard_in
,"%s/redirector.unsort",tmp
);
294 sprintf(guard_ou
,"%s/redirector.log",tmp
);
295 if((fp_ou
=fopen(guard_in
,"a"))==NULL
) {
296 debuga(_("(squidguard) Cannot open log file %s\n"),guard_in
);
300 dfrom
=(period
.start
.tm_year
+1900)*10000+(period
.start
.tm_mon
+1)*100+period
.start
.tm_mday
;
301 duntil
=(period
.end
.tm_year
+1900)*10000+(period
.end
.tm_mon
+1)*100+period
.end
.tm_mday
;
303 if (NRedirectorLogs
>0) {
304 for (i
=0 ; i
<NRedirectorLogs
; i
++)
305 read_log(RedirectorLogs
[i
],fp_ou
,dfrom
,duntil
);
307 if(access(SquidGuardConf
, R_OK
) != 0) {
308 debuga(_("Cannot open squidGuard config file: %s\n"),SquidGuardConf
);
312 if((fp_guard
=fopen(SquidGuardConf
,"r"))==NULL
) {
313 debuga(_("(squidguard) Cannot open log file %s\n"),SquidGuardConf
);
318 while(fgets(buf
,sizeof(buf
),fp_guard
)!=NULL
) {
320 if((str
=get_param_value("logdir",buf
))!=NULL
) {
322 We want to tolerate spaces inside the directory name but we must also
323 remove the trailing spaces left by the editor after the directory name.
324 This should not be a problem as nobody use a file name with trailing spaces.
326 for (y
=strlen(str
)-1 ; y
>=0 && (unsigned char)str
[y
]<=' ' ; y
--);
327 if (y
>=sizeof(logdir
)-1) y
=sizeof(logdir
)-2;
333 } else if((str
=get_param_value("log",buf
))!=NULL
) {
334 if((str2
=get_param_value("anonymous",str
))!=NULL
)
338 If logdir is defined, we prepend it to the log file name, otherwise, we assume
339 the log directive provides an absolute file name to the log file. Therefore,
340 we don't need to add an additionnal / at the beginning of the log file name.
342 y
=(logdir
[0]) ? sprintf(wentp
,"%s/",logdir
) : 0;
344 Spaces are allowed in the name of the log file. The file name ends at the first #
345 because it is assumed it is an end of line comment. Any space before the # is then
346 removed. Any control character (i.e. a character with a code lower than 32) ends
347 the file name. That includes the terminating zero.
349 while((unsigned char)*str
>=' ' && *str
!='#' && y
<sizeof(wentp
)-1)
353 while(*str
==' ' && y
>0) {
359 read_log(wentp
,fp_ou
,dfrom
,duntil
);
364 if (fp_guard
) fclose(fp_guard
);
365 if (fp_ou
) fclose(fp_ou
);
368 for (y
=0; y
<nfiles_done
; y
++)
369 if (files_done
[y
]) free(files_done
[y
]);
374 debuga(_("Sorting file: %s\n"),guard_ou
);
377 sprintf(tmp6
,"sort -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in
, guard_ou
);
378 cstatus
=system(tmp6
);
379 if (!WIFEXITED(cstatus
) || WEXITSTATUS(cstatus
)) {
380 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus
));
381 debuga(_("sort command: %s\n"),tmp6
);