2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
30 static char **files_done
= NULL
;
31 static int nfiles_done
= 0;
34 static void read_log(const char *wentp
, FILE *fp_ou
,int dfrom
,int duntil
)
38 char leks
[5], sep
[2], res
[MAXLEN
];
40 char source
[128], list
[128];
41 char url
[MAX_URL_LEN
];
42 char user
[MAX_USER_LEN
];
44 long long int lmon
, lday
, lyear
;
50 struct getwordstruct gwarea
;
51 struct getwordstruct gwarea1
;
52 struct userinfostruct
*uinfo
;
56 debuga(_("Reading redirector log file %s\n"),wentp
);
59 /* With squidGuard, you can log groups in only one log file.
60 We must parse each log files only one time. Example :
62 domainlist porn/domains
67 domainlist aggressive/domains
68 urllist aggressive/urls
72 domainlist audio-video/domains
73 urllist audio-video/urls
77 for (i
=0; i
<nfiles_done
; i
++)
78 if (!strcmp(wentp
, files_done
[i
])) return;
81 files_done
= realloc(files_done
, nfiles_done
*sizeof(char *));
83 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
86 files_done
[nfiles_done
-1] = strdup(wentp
);
87 if (!files_done
[nfiles_done
-1]) {
88 debuga(_("Not enough memory to store the name of the new redirector log to be read - %s\n"),strerror(errno
));
92 if ((fp_in
=fopen(wentp
,"r"))==NULL
) {
93 debuga(_("(squidguard) Cannot open log file %s\n"),wentp
);
97 if ((line
=longline_create())==NULL
) {
98 debuga(_("Not enough memory to read the redirector log\n"));
102 while ((buf
=longline_read(fp_in
,line
)) != NULL
) {
103 getword_start(&gwarea
,buf
);
104 if(RedirectorLogFormat
[0] != '\0') {
105 getword_start(&gwarea1
,RedirectorLogFormat
);
107 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0) {
108 debuga(_("There is a broken record or garbage in your %s file\n"),wentp
);
114 while(strcmp(leks
,"end") != 0) {
115 if (getword(leks
,sizeof(leks
),&gwarea1
,'#')<0 || getword(sep
,sizeof(sep
),&gwarea1
,'#')<0) {
116 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp
);
119 if(strcmp(leks
,"end") != 0) {
120 if (getword_limit(res
,sizeof(res
),&gwarea
,sep
[0])<0) {
121 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),wentp
);
124 if(strcmp(leks
,"year") == 0) {
126 } else if(strcmp(leks
,"mon") == 0) {
128 } else if(strcmp(leks
,"day") == 0) {
130 } else if(strcmp(leks
,"hour") == 0) {
131 if (strlen(res
)>=sizeof(hour
)) {
132 debuga(_("Hour string too long in redirector log file %s\n"),wentp
);
136 } else if(strcmp(leks
,"list") == 0) {
137 if (strlen(res
)>=sizeof(list
)) {
138 debuga(_("Banning list name too long in redirector log file %s\n"),wentp
);
142 } else if(strcmp(leks
,"ip") == 0) {
143 if (strlen(res
)>=sizeof(ip
)) {
144 debuga(_("IP address too long in redirector log file %s\n"),wentp
);
148 } else if(strcmp(leks
,"user") == 0) {
149 if (strlen(res
)>=sizeof(user
)) {
150 debuga(_("User ID too long in redirector log file %s\n"),wentp
);
154 } else if(strcmp(leks
,"url") == 0) {
155 if (strlen(res
)>=sizeof(url
)) {
156 debuga(_("URL too long in redirector log file %s\n"),wentp
);
164 if (getword_atoll(&lyear
,&gwarea
,'-')<0 || getword_atoll(&lmon
,&gwarea
,'-')<0 ||
165 getword_atoll(&lday
,&gwarea
,' ')<0) {
166 debuga(_("Invalid date found in file %s\n"),wentp
);
172 if (getword(hour
,sizeof(hour
),&gwarea
,' ')<0) {
173 debuga(_("Invalid time found in file %s\n"),wentp
);
176 if (getword_skip(MAXLEN
,&gwarea
,'(')<0 || getword(source
,sizeof(source
),&gwarea
,'/')<0) {
177 debuga(_("Invalid redirected source in file %s\n"),wentp
);
180 if (getword(list
,sizeof(list
),&gwarea
,'/')<0) {
181 debuga(_("Invalid redirected list in file %s\n"),wentp
);
184 if (getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword_limit(url
,sizeof(url
),&gwarea
,' ')<0) {
185 debuga(_("Invalid URL in file %s\n"),wentp
);
188 if (getword(ip
,sizeof(ip
),&gwarea
,'/')<0) {
189 debuga(_("Invalid source IP in file %s\n"),wentp
);
192 if (getword_skip(MAXLEN
,&gwarea
,' ')<0 || getword(user
,sizeof(user
),&gwarea
,' ')<0) {
193 debuga(_("Invalid user in file %s\n"),wentp
);
197 The URL may be "http://url:port/data" if the method is GET or simply "url:port/" if the method is CONNECT.
198 The following code removes the protocol:// if it is detected and always truncates the URL after the domain name.
199 It will fail if the URL doesn't start with the protocol and contains two consecutive / in the path (i.e.
200 the URL is not normalized).
206 for (i
=0 ; *str
&& *str
!='/' ; i
++) url
[i
]=*str
++;
214 //sprintf(warea,"%04d%02d%02d",year,mon,day);
216 if(RedirectorIgnoreDate
) {
217 idata
= year
*10000+mon
*100+day
;
218 if(idata
< dfrom
|| idata
> duntil
)
227 if(strcmp(user
,"-") == 0 || strcmp(user
," ") == 0 || strcmp(user
,"") == 0) {
228 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IP
) {
232 if(RecordsWithoutUser
== RECORDWITHOUTUSER_IGNORE
)
234 if(RecordsWithoutUser
== RECORDWITHOUTUSER_EVERYBODY
)
235 strcpy(user
,"everybody");
238 uinfo
=userinfo_find_from_id(user
);
240 uinfo
=userinfo_create(user
);
241 uinfo
->id_is_ip
=id_is_ip
;
242 if(Ip2Name
&& id_is_ip
) ip2name(user
,sizeof(user
));
243 user_find(uinfo
->label
,MAX_USER_LEN
, user
);
245 fprintf(fp_ou
,"%s\t%04d%02d%02d\t%s\t%s\t%s\t%s/%s\n",uinfo
->id
,year
,mon
,day
,hour
,ip
,url
,source
,list
);
249 longline_destroy(&line
);
254 void squidguard_log(void)
256 FILE *fp_ou
= NULL
, *fp_guard
= NULL
;
258 char guard_in
[MAXLEN
];
259 char guard_ou
[MAXLEN
];
272 if(SquidGuardConf
[0] == '\0' && NRedirectorLogs
== 0)
275 sprintf(guard_in
,"%s/redirector.unsort",tmp
);
276 sprintf(guard_ou
,"%s/redirector.log",tmp
);
277 if((fp_ou
=fopen(guard_in
,"a"))==NULL
) {
278 debuga(_("(squidguard) Cannot open log file %s\n"),guard_in
);
282 dfrom
=(period
.start
.tm_year
+1900)*10000+(period
.start
.tm_mon
+1)*100+period
.start
.tm_mday
;
283 duntil
=(period
.end
.tm_year
+1900)*10000+(period
.end
.tm_mon
+1)*100+period
.end
.tm_mday
;
285 if (NRedirectorLogs
>0) {
286 for (i
=0 ; i
<NRedirectorLogs
; i
++)
287 read_log(RedirectorLogs
[i
],fp_ou
,dfrom
,duntil
);
289 if(access(SquidGuardConf
, R_OK
) != 0) {
290 debuga(_("Cannot open squidGuard config file: %s\n"),SquidGuardConf
);
294 if((fp_guard
=fopen(SquidGuardConf
,"r"))==NULL
) {
295 debuga(_("(squidguard) Cannot open log file %s\n"),SquidGuardConf
);
300 while(fgets(buf
,sizeof(buf
),fp_guard
)!=NULL
) {
302 if((str
=get_param_value("logdir",buf
))!=NULL
) {
304 We want to tolerate spaces inside the directory name but we must also
305 remove the trailing spaces left by the editor after the directory name.
306 This should not be a problem as nobody use a file name with trailing spaces.
308 for (y
=strlen(str
)-1 ; y
>=0 && (unsigned char)str
[y
]<=' ' ; y
--);
309 if (y
>=sizeof(logdir
)-1) y
=sizeof(logdir
)-2;
315 } else if((str
=get_param_value("log",buf
))!=NULL
) {
316 if((str2
=get_param_value("anonymous",str
))!=NULL
)
320 If logdir is defined, we prepend it to the log file name, otherwise, we assume
321 the log directive provides an absolute file name to the log file. Therefore,
322 we don't need to add an additionnal / at the beginning of the log file name.
324 y
=(logdir
[0]) ? sprintf(wentp
,"%s/",logdir
) : 0;
326 Spaces are allowed in the name of the log file. The file name ends at the first #
327 because it is assumed it is an end of line comment. Any space before the # is then
328 removed. Any control character (i.e. a character with a code lower than 32) ends
329 the file name. That includes the terminating zero.
331 while((unsigned char)*str
>=' ' && *str
!='#' && y
<sizeof(wentp
)-1)
335 while(*str
==' ' && y
>0) {
341 read_log(wentp
,fp_ou
,dfrom
,duntil
);
346 if (fp_guard
) fclose(fp_guard
);
347 if (fp_ou
) fclose(fp_ou
);
350 for (y
=0; y
<nfiles_done
; y
++)
351 if (files_done
[y
]) free(files_done
[y
]);
356 debuga(_("Sorting file: %s\n"),guard_ou
);
359 sprintf(tmp6
,"sort -k 1,1 -k 2,2 -k 4,4 \"%s\" -o \"%s\"",guard_in
, guard_ou
);
360 cstatus
=system(tmp6
);
361 if (!WIFEXITED(cstatus
) || WEXITSTATUS(cstatus
)) {
362 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus
));
363 debuga(_("sort command: %s\n"),tmp6
);