2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
30 static char **files_done
= NULL
;
31 static int nfiles_done
= 0;
34 static void read_log(const char *wentp
, FILE *fp_ou
,int dfrom
,int duntil
)
38 char leks
[5], sep
[2], res
[MAXLEN
];
40 char source
[128], list
[128];
41 char url
[MAX_URL_LEN
];
42 char user
[MAX_USER_LEN
];
44 long long int lmon
, lday
, lyear
;
50 struct getwordstruct gwarea
;
51 struct getwordstruct gwarea1
;
52 struct userinfostruct
*uinfo
;
55 debuga(_("Reading redirector log file %s\n"),wentp
);
58 /* With squidGuard, you can log groups in only one log file.
59 We must parse each log files only one time. Example :
61 domainlist porn/domains
66 domainlist aggressive/domains
67 urllist aggressive/urls
71 domainlist audio-video/domains
72 urllist audio-video/urls
76 for (i
=0; i
<nfiles_done
; i
++)
77 if (!strcmp(wentp
, files_done
[i
])) return;
80 files_done
= realloc(files_done
, nfiles_done
*sizeof(char *));
82 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
85 files_done
[nfiles_done
-1] = strdup(wentp
);
86 if (!files_done
[nfiles_done
-1]) {
87 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
91 if ((fp_in
=fopen(wentp
,"r"))==NULL
) {
92 debuga(_("(squidguard) Cannot open log file %s\n"),wentp
);
96 while (fgets(buf
,sizeof(buf
),fp_in
) != NULL
) {
97 getword_start(&gwarea
,buf
);
98 if(RedirectorLogFormat
[0] != '\0') {
99 getword_start(&gwarea1
,RedirectorLogFormat
);
101 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0) {
102 debuga(_("There is a broken record or garbage in your %s file\n"),wentp
);
108 while(strcmp(leks
,"end") != 0) {
109 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0 || getword(sep
,sizeof(sep
),&gwarea1
,'#')<0) {
110 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp
);
113 if(strcmp(leks
,"end") != 0) {
114 if (getword(res
,sizeof(res
),&gwarea
,sep
[0])<0) {
115 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp
);
118 if(strcmp(leks
,"year") == 0) {
120 } else if(strcmp(leks
,"mon") == 0) {
122 } else if(strcmp(leks
,"day") == 0) {
124 } else if(strcmp(leks
,"hour") == 0) {
125 if (strlen(res
)>=sizeof(hour
)) {
126 debuga(_("Hour string too long in redirector log file %s\n"),wentp
);
130 } else if(strcmp(leks
,"list") == 0) {
131 if (strlen(res
)>=sizeof(list
)) {
132 debuga(_("Banning list name too long in redirector log file %s\n"),wentp
);
136 } else if(strcmp(leks
,"ip") == 0) {
137 if (strlen(res
)>=sizeof(ip
)) {
138 debuga(_("IP address too long in redirector log file %s\n"),wentp
);
142 } else if(strcmp(leks
,"user") == 0) {
143 if (strlen(res
)>=sizeof(user
)) {
144 debuga(_("User ID too long in redirector log file %s\n"),wentp
);
148 } else if(strcmp(leks
,"url") == 0) {
149 if (strlen(res
)>=sizeof(url
)) {
150 debuga(_("URL too long in redirector log file %s\n"),wentp
);
158 if (getword_atoll(&lyear
,&gwarea
,'-')<0 || getword_atoll(&lmon
,&gwarea
,'-')<0 ||
159 getword_atoll(&lday
,&gwarea
,' ')<0 || getword(hour
,sizeof(hour
),&gwarea
,' ')<0 ||
160 getword_skip(MAXLEN
,&gwarea
,'(')<0 ||
161 getword(source
,sizeof(source
),&gwarea
,'/')<0 || getword(list
,sizeof(list
),&gwarea
,'/')<0 ||
162 getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword(url
,sizeof(url
),&gwarea
,' ')<0 ||
163 getword(ip
,sizeof(ip
),&gwarea
,'/')<0 || getword_skip(MAXLEN
,&gwarea
,' ')<0 ||
164 getword(user
,sizeof(user
),&gwarea
,' ')<0) {
165 debuga(_("There is a broken record or garbage in your %s file\n"),wentp
);
172 The URL may be "http://url:port/data" if the method is GET or simply "url:port/" if the method is CONNECT.
173 The following code removes the protocol:// if it is detected and always truncates the URL after the domain name.
174 It will fail if the URL doesn't start with the protocol and contains two consecutive / in the path (i.e.
175 the URL is not normalized).
181 for (i
=0 ; *str
&& *str
!='/' ; i
++) url
[i
]=*str
++;
189 //sprintf(warea,"%04d%02d%02d",year,mon,day);
191 if(RedirectorIgnoreDate
) {
192 idata
= year
*10000+mon
*100+day
;
193 if(idata
< dfrom
|| idata
> duntil
)
202 if(strcmp(user
,"-") == 0 || strcmp(user
," ") == 0 || strcmp(user
,"") == 0) {
203 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IP
) {
207 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IGNORE
)
209 if(RecordsWithoutUser
== RECORDWITHOUTUSER_EVERYBODY
)
210 strcpy(user
,"everybody");
213 uinfo
=userinfo_find_from_id(user
);
215 uinfo
=userinfo_create(user
);
216 uinfo
->id_is_ip
=id_is_ip
;
217 if(Ip2Name
&& id_is_ip
) ip2name(user
,sizeof(user
));
218 user_find(uinfo
->label
,MAX_USER_LEN
, user
);
220 fprintf(fp_ou
,"%s\t%04d%02d%02d\t%s\t%s\t%s\t%s/%s\n",uinfo
->id
,year
,mon
,day
,hour
,ip
,url
,source
,list
);
223 if (fp_in
) fclose(fp_in
);
228 void squidguard_log(void)
230 FILE *fp_ou
= NULL
, *fp_guard
= NULL
;
232 char guard_in
[MAXLEN
];
233 char guard_ou
[MAXLEN
];
246 if(SquidGuardConf
[0] == '\0' && NRedirectorLogs
== 0)
249 sprintf(guard_in
,"%s/redirector.unsort",tmp
);
250 sprintf(guard_ou
,"%s/redirector.log",tmp
);
251 if((fp_ou
=fopen(guard_in
,"a"))==NULL
) {
252 debuga(_("(squidguard) Cannot open log file %s\n"),guard_in
);
256 dfrom
=(period
.start
.tm_year
+1900)*10000+(period
.start
.tm_mon
+1)*100+period
.start
.tm_mday
;
257 duntil
=(period
.end
.tm_year
+1900)*10000+(period
.end
.tm_mon
+1)*100+period
.end
.tm_mday
;
259 if (NRedirectorLogs
>0) {
260 for (i
=0 ; i
<NRedirectorLogs
; i
++)
261 read_log(RedirectorLogs
[i
],fp_ou
,dfrom
,duntil
);
263 if(access(SquidGuardConf
, R_OK
) != 0) {
264 debuga(_("Cannot open squidGuard config file: %s\n"),SquidGuardConf
);
268 if((fp_guard
=fopen(SquidGuardConf
,"r"))==NULL
) {
269 debuga(_("(squidguard) Cannot open log file %s\n"),SquidGuardConf
);
274 while(fgets(buf
,sizeof(buf
),fp_guard
)!=NULL
) {
276 if((str
=get_param_value("logdir",buf
))!=NULL
) {
278 We want to tolerate spaces inside the directory name but we must also
279 remove the trailing spaces left by the editor after the directory name.
280 This should not be a problem as nobody use a file name with trailing spaces.
282 for (y
=strlen(str
)-1 ; y
>=0 && (unsigned char)str
[y
]<=' ' ; y
--);
283 if (y
>=sizeof(logdir
)-1) y
=sizeof(logdir
)-2;
289 } else if((str
=get_param_value("log",buf
))!=NULL
) {
290 if((str2
=get_param_value("anonymous",str
))!=NULL
)
294 If logdir is defined, we prepend it to the log file name, otherwise, we assume
295 the log directive provides an absolute file name to the log file. Therefore,
296 we don't need to add an additionnal / at the beginning of the log file name.
298 y
=(logdir
[0]) ? sprintf(wentp
,"%s/",logdir
) : 0;
300 Spaces are allowed in the name of the log file. The file name ends at the first #
301 because it is assumed it is an end of line comment. Any space before the # is then
302 removed. Any control character (i.e. a character with a code lower than 32) ends
303 the file name. That includes the terminating zero.
305 while((unsigned char)*str
>=' ' && *str
!='#' && y
<sizeof(wentp
)-1)
309 while(*str
==' ' && y
>0) {
315 read_log(wentp
,fp_ou
,dfrom
,duntil
);
320 if (fp_guard
) fclose(fp_guard
);
321 if (fp_ou
) fclose(fp_ou
);
324 for (y
=0; y
<nfiles_done
; y
++)
325 if (files_done
[y
]) free(files_done
[y
]);
330 debuga(_("Sorting file: %s\n"),guard_ou
);
333 sprintf(tmp6
,"sort -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in
, guard_ou
);
334 cstatus
=system(tmp6
);
335 if (!WIFEXITED(cstatus
) || WEXITSTATUS(cstatus
)) {
336 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus
));
337 debuga(_("sort command: %s\n"),tmp6
);