]> git.ipfire.org Git - thirdparty/sarg.git/blame - download.c
Remove trailing spaces in every source file
[thirdparty/sarg.git] / download.c
CommitLineData
25697a35 1/*
94ff9470 2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
61d965f3 3 * 1998, 2012
25697a35
GS
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
1164c474
FM
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
25697a35
GS
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27#include "include/conf.h"
5f3cfd1d 28#include "include/defs.h"
25697a35 29
b2fa3eb6
FM
30/*!
31The buffer to store the list of the suffixes to take into account when generating
32the report of the downloaded files. The suffixes in the list are separated by the ASCII
33null.
34*/
2824ec9b 35/*@null@*/static char *DownloadSuffix=NULL;
b2fa3eb6
FM
36
37/*!
38The index of all the suffixes stored in ::DownloadSuffix. The list is sorted alphabetically.
39to speed up the search.
40*/
2824ec9b 41/*@null@*/static char **DownloadSuffixIndex=NULL;
b2fa3eb6
FM
42
43/*!
44The number of suffixes in ::DownloadSuffixIndex.
45*/
6e792ade
FM
46static int NDownloadSuffix=0;
47
b2fa3eb6
FM
48/*!
49Sort the raw log file with the downloaded files.
50
51\param report_in The name of the file where to store the sorted entries.
73b57f55
FM
52
53The file is sorted by columns 3, 1, 2 and 5 that are the columns of the user's ID, the
54date, the time and the URL.
b2fa3eb6
FM
55*/
56static void download_sort(const char *report_in)
57{
58 int clen;
59 char csort[MAXLEN];
60 int cstatus;
bd43d81f 61
c98d6a0f 62 clen=snprintf(csort,sizeof(csort),"sort -T \"%s\" -t \"\t\" -k 3,3 -k 1,1 -k 2,2 -k 5,5 -o \"%s\" \"%s/download.int_unsort\"",
b2fa3eb6
FM
63 tmp, report_in, tmp);
64 if (clen>=sizeof(csort)) {
c98d6a0f 65 debuga(_("Path too long to sort the file: %s/download.int_unsort\n"),tmp);
b2fa3eb6
FM
66 exit(EXIT_FAILURE);
67 }
68 cstatus=system(csort);
69 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
70 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
71 debuga(_("sort command: %s\n"),csort);
72 exit(EXIT_FAILURE);
73 }
c98d6a0f
FM
74 if (snprintf(csort,sizeof(csort),"%s/download.int_unsort",tmp)>=sizeof(csort)) {
75 debuga(_("Path too long for %s/download.int_unsort\n"),tmp);
b2fa3eb6
FM
76 exit(EXIT_FAILURE);
77 }
11767c6a
FM
78 if (!KeepTempLog && unlink(csort)) {
79 debuga(_("Cannot delete \"%s\": %s\n"),csort,strerror(errno));
b2fa3eb6
FM
80 exit(EXIT_FAILURE);
81 }
82}
83
84/*!
85Generate the report of the downloaded files. The list of the suffixes to take into account
86is set with set_download_suffix().
87*/
32e71fa4 88void download_report(void)
25697a35 89{
9bd92830
FM
90 FILE *fp_in = NULL, *fp_ou = NULL;
91
92 char *buf;
93 char *url;
94 char report_in[MAXLEN];
95 char report[MAXLEN];
96 char ip[MAXLEN];
97 char oip[MAXLEN];
98 char user[MAXLEN];
99 char ouser[MAXLEN];
100 char ouser2[MAXLEN];
101 char data[15];
102 char hora[15];
103 int z=0;
104 int count=0;
105 int i;
106 int day,month,year;
107 bool new_user;
108 struct getwordstruct gwarea;
109 longline line;
110 struct userinfostruct *uinfo;
111 struct tm t;
112
5589b847
FM
113 if (!ndownload) {
114 if (debugz) debugaz(_("No downloaded files to report\n"));
115 return;
116 }
117
9bd92830
FM
118 ouser[0]='\0';
119 ouser2[0]='\0';
120
b2fa3eb6 121 // sort the raw file
c98d6a0f 122 snprintf(report_in,sizeof(report_in),"%s/download.int_log",tmp);
b2fa3eb6 123 download_sort(report_in);
9f93fec3 124 if(access(report_in, R_OK) != 0) {
5589b847
FM
125 debugaz(_("Sorted file doesn't exist (to produce the download report)\n"));
126 exit(EXIT_FAILURE);
9f93fec3 127 }
9bd92830 128
b2fa3eb6 129 // produce the report.
9bd92830
FM
130 snprintf(report,sizeof(report),"%s/download.html",outdirname);
131
132 if((fp_in=MY_FOPEN(report_in,"r"))==NULL) {
133 debuga(_("(download) Cannot open log file %s\n"),report_in);
134 exit(EXIT_FAILURE);
135 }
136
137 if((fp_ou=MY_FOPEN(report,"w"))==NULL) {
138 debuga(_("(download) Cannot open log file %s\n"),report);
139 exit(EXIT_FAILURE);
140 }
141
142 write_html_header(fp_ou,(IndexTree == INDEX_TREE_DATE) ? 3 : 1,_("Downloads"),HTML_JS_NONE);
143 fputs("<tr><td class=\"header_c\">",fp_ou);
144 fprintf(fp_ou,_("Period: %s"),period.html);
145 fputs("</td></tr>\n",fp_ou);
146 fprintf(fp_ou,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Downloads"));
147 close_html_header(fp_ou);
148
149 fputs("<div class=\"report\"><table cellpadding=\"0\" cellspacing=\"2\">\n",fp_ou);
150 fprintf(fp_ou,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"));
151
152 if ((line=longline_create())==NULL) {
153 debuga(_("Not enough memory to read the downloaded files\n"));
154 exit(EXIT_FAILURE);
155 }
156
157 while((buf=longline_read(fp_in,line))!=NULL) {
158 getword_start(&gwarea,buf);
159 if (getword(data,sizeof(data),&gwarea,'\t')<0 || getword(hora,sizeof(hora),&gwarea,'\t')<0 ||
007905af 160 getword(user,sizeof(user),&gwarea,'\t')<0 || getword(ip,sizeof(ip),&gwarea,'\t')<0) {
9bd92830
FM
161 debuga(_("There is a broken record or garbage in file %s\n"),report_in);
162 exit(EXIT_FAILURE);
163 }
164 if (getword_ptr(buf,&url,&gwarea,'\t')<0) {
165 debuga(_("There is a broken url in file %s\n"),report_in);
166 exit(EXIT_FAILURE);
167 }
168 if (sscanf(data,"%d/%d/%d",&day,&month,&year)!=3) continue;
169 computedate(year,month,day,&t);
170 strftime(data,sizeof(data),"%x",&t);
171
172 uinfo=userinfo_find_from_id(user);
173 if (!uinfo) {
174 debuga(_("Unknown user ID %s in file %s\n"),user,report_in);
175 exit(EXIT_FAILURE);
176 }
177 new_user=false;
178 if(!z) {
179 strcpy(ouser,user);
180 strcpy(oip,ip);
181 z++;
182 new_user=true;
183 } else {
184 if(strcmp(ouser,user) != 0) {
185 strcpy(ouser,user);
186 new_user=true;
187 }
188 if(strcmp(oip,ip) != 0) {
189 strcpy(oip,ip);
190 new_user=true;
191 }
192 }
193
194 if(DownloadReportLimit) {
195 if(strcmp(ouser2,uinfo->label) == 0) {
196 count++;
197 } else {
198 count=1;
199 strcpy(ouser2,uinfo->label);
200 }
201 if(count >= DownloadReportLimit)
202 continue;
203 }
204
205 for (i=strlen(url)-1 ; i>=0 && (unsigned char)url[i]<' ' ; i--) url[i]=0;
206
207 fputs("<tr>",fp_ou);
5138c1b9
FM
208 if (new_user) {
209 if (uinfo->topuser)
210 fprintf(fp_ou,"<td class=\"data\"><a href=\"%s/%s.html\">%s</a></td><td class=\"data\">%s</td>",uinfo->filename,uinfo->filename,uinfo->label,ip);
211 else
212 fprintf(fp_ou,"<td class=\"data\">%s</td><td class=\"data\">%s</td>",uinfo->label,ip);
213 } else
9bd92830
FM
214 fputs("<td class=\"data\"></td><td class=\"data\"></td>",fp_ou);
215 fprintf(fp_ou,"<td class=\"data\">%s-%s</td><td class=\"data2\">",data,hora);
67a93701 216 if(BlockIt[0]!='\0' && url[0]!=ALIAS_PREFIX) {
9bd92830
FM
217 fprintf(fp_ou,"<a href=\"%s%s?url=\"",wwwDocumentRoot,BlockIt);
218 output_html_url(fp_ou,url);
219 fprintf(fp_ou,"\"><img src=\"%s/sarg-squidguard-block.png\"></a>&nbsp;",ImageFile);
220 }
6fa33a32 221 output_html_link(fp_ou,url,100);
67a93701 222 fputs("</td></tr>\n",fp_ou);
9bd92830
FM
223 }
224 fclose(fp_in);
225 longline_destroy(&line);
226
227 fputs("</table></div>\n",fp_ou);
228 if (write_html_trailer(fp_ou)<0)
229 debuga(_("Write error in file %s\n"),report);
230 if (fclose(fp_ou)==EOF)
231 debuga(_("Failed to close file %s - %s\n"),report,strerror(errno));
232
11767c6a
FM
233 if (!KeepTempLog && unlink(report_in)) {
234 debuga(_("Cannot delete \"%s\": %s\n"),report_in,strerror(errno));
08f9b029
FM
235 exit(EXIT_FAILURE);
236 }
9bd92830
FM
237
238 return;
25697a35 239}
6e792ade 240
b2fa3eb6
FM
241/*!
242Free the memory allocated by set_download_suffix().
243*/
6e792ade
FM
244void free_download(void)
245{
9bd92830
FM
246 if (DownloadSuffix) {
247 free(DownloadSuffix);
248 DownloadSuffix=NULL;
249 }
250 if (DownloadSuffixIndex) {
251 free(DownloadSuffixIndex);
252 DownloadSuffixIndex=NULL;
253 }
254 NDownloadSuffix=0;
6e792ade
FM
255}
256
b2fa3eb6
FM
257/*!
258Set the list of the suffixes corresponding to the download of files you want to detect with
259is_download_suffix(). The list is sorted to make the search faster.
260
261\param list A comma separated list of the suffixes to set in ::DownloadSuffix.
262
263\note The memory allocated by this function must be freed by free_download().
264*/
6e792ade
FM
265void set_download_suffix(const char *list)
266{
9bd92830
FM
267 char *str;
268 int i, j, k;
269 int cmp;
270
271 free_download();
272
273 DownloadSuffix=strdup(list);
274 if (!DownloadSuffix) {
275 debuga(_("Download suffix list too long\n"));
276 exit(EXIT_FAILURE);
277 }
278 j = 1;
279 for (i=0 ; list[i] ; i++)
280 if (list[i] == ',') j++;
281 DownloadSuffixIndex=malloc(j*sizeof(char *));
282 if (!DownloadSuffixIndex) {
283 debuga(_("Too many download suffixes\n"));
284 exit(EXIT_FAILURE);
285 }
286
287 str = DownloadSuffix;
288 for (i=0 ; DownloadSuffix[i] ; i++) {
289 if (DownloadSuffix[i] == ',') {
290 DownloadSuffix[i] = '\0';
291 if (*str) {
292 cmp = -1;
293 for (j=0 ; j<NDownloadSuffix && (cmp=strcasecmp(str,DownloadSuffixIndex[j]))>0 ; j++);
294 if (cmp != 0) {
295 for (k=NDownloadSuffix ; k>j ; k--)
296 DownloadSuffixIndex[k]=DownloadSuffixIndex[k-1];
297 NDownloadSuffix++;
298 DownloadSuffixIndex[j]=str;
299 }
300 }
301 str=DownloadSuffix+i+1;
302 }
303 }
304
305 if (*str) {
306 cmp = -1;
307 for (j=0 ; j<NDownloadSuffix && (cmp=strcasecmp(str,DownloadSuffixIndex[j]))>0 ; j++);
308 if (cmp != 0) {
309 for (k=NDownloadSuffix ; k>j ; k--)
310 DownloadSuffixIndex[k]=DownloadSuffixIndex[k-1];
311 NDownloadSuffix++;
312 DownloadSuffixIndex[j]=str;
313 }
314 }
6e792ade
FM
315}
316
b2fa3eb6
FM
317/*!
318Tell if the URL correspond to a downloaded file. The function takes the extension at the end of the
319URL with a maximum of 9 characters and compare it to the list of the download suffix in
320::DownloadSuffix. If the suffix is found in the list, the function reports the URL as the download
321of a file.
322
323\param url The URL to test.
324
325\retval 1 The URL matches a suffix of a download.
326\retval 0 The URL is not a known download.
327
328\note A downloaded file cannot be detected if the file name is embedded in a GET or POST request. Only requests
329that ends with the file name can be detected.
330
331\note A URL embedding another web site's address ending by .com at the end of the URL will match the download
332extension com if it is defined in the ::DownloadSuffix.
333*/
2824ec9b 334bool is_download_suffix(const char *url)
6e792ade 335{
9bd92830
FM
336 int urllen;
337 int i;
338 int down, up, center;
339 const char *suffix;
340 int cmp;
341 const int max_suffix=10;
342
343 if (DownloadSuffix == NULL || NDownloadSuffix == 0) return(false);
344
345 urllen=strlen(url)-1;
346 if (urllen<=0) return(false);
347 if (url[urllen] == '.') return(false); //reject a single trailing dot
348 for (i=0 ; i<urllen && (url[i]!='/' || url[i+1]=='/') && url[i]!='?' ; i++);
349 if (i>=urllen) return(false); // url is a hostname without any path or file to download
350
351 for (i=0 ; i<=max_suffix && i<urllen && url[urllen-i]!='.' ; i++)
352 if (url[urllen-i] == '/' || url[urllen-i] == '?') return(false);
353 if (i>max_suffix || i>=urllen) return(false);
354
355 suffix=url+urllen-i+1;
356 down=0;
357 up=NDownloadSuffix-1;
358 while (down<=up) {
359 center=(down+up)/2;
360 cmp=strcasecmp(suffix,DownloadSuffixIndex[center]);
361 if (cmp == 0) return(true);
362 if (cmp < 0)
363 up = center-1;
364 else
365 down = center+1;
366 }
367 return(false);
6e792ade
FM
368}
369