]> git.ipfire.org Git - thirdparty/sarg.git/blame - download.c
mkdir is really a mess on MinGW
[thirdparty/sarg.git] / download.c
CommitLineData
25697a35 1/*
94ff9470 2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
67302a9e 3 * 1998, 2013
25697a35
GS
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
1164c474
FM
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
25697a35
GS
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27#include "include/conf.h"
5f3cfd1d 28#include "include/defs.h"
f83d7b44 29#include "include/readlog.h"
25697a35 30
b2fa3eb6
FM
31/*!
32The buffer to store the list of the suffixes to take into account when generating
33the report of the downloaded files. The suffixes in the list are separated by the ASCII
34null.
35*/
2824ec9b 36/*@null@*/static char *DownloadSuffix=NULL;
b2fa3eb6
FM
37
38/*!
39The index of all the suffixes stored in ::DownloadSuffix. The list is sorted alphabetically.
40to speed up the search.
41*/
2824ec9b 42/*@null@*/static char **DownloadSuffixIndex=NULL;
b2fa3eb6
FM
43
44/*!
45The number of suffixes in ::DownloadSuffixIndex.
46*/
6e792ade
FM
47static int NDownloadSuffix=0;
48
11284535
FM
49//! Name of the file containing the unsorted downloaded entries.
50static char download_unsort[MAXLEN]="";
51//! The file handle to write the entries.
52static FILE *fp_download=NULL;
53//! \c True if at least one downloaded entry exists.
54static bool download_exists=false;
55
56/*!
57Open a file to store the denied accesses.
58
59\return The file handle or NULL if no file is necessary.
60*/
61void download_open(void)
62{
63 if ((ReportType & REPORT_TYPE_DOWNLOADS) == 0) {
cb59dc47 64 if (debugz>=LogLevel_Process) debugaz(_("Download report not produced as it is not requested\n"));
11284535
FM
65 return;
66 }
67 if (Privacy) {
cb59dc47 68 if (debugz>=LogLevel_Process) debugaz(_("Download report not produced because privacy option is active\n"));
11284535
FM
69 return;
70 }
71
72 snprintf(download_unsort,sizeof(download_unsort),"%s/download.int_unsort",tmp);
73 if ((fp_download=MY_FOPEN(download_unsort,"w"))==NULL) {
d6f0349d 74 debuga(_("(log) Cannot open file %s: %s\n"),download_unsort,strerror(errno));
11284535
FM
75 exit(EXIT_FAILURE);
76 }
77 return;
78}
79
80/*!
81Write one entry in the unsorted downloaded file provided that it is required.
82
83\param log_entry The entry to write into the log file.
84\param url The URL of the downloaded file.
85*/
86void download_write(const struct ReadLogStruct *log_entry,const char *url)
87{
88 char date[80];
89
88776d28 90 if (fp_download && strstr(log_entry->HttpCode,"DENIED") == 0) {
11284535
FM
91 strftime(date,sizeof(date),"%d/%m/%Y\t%H:%M:%S",&log_entry->EntryTime);
92 fprintf(fp_download,"%s\t%s\t%s\t%s\n",date,log_entry->User,log_entry->Ip,url);
93 download_exists=true;
94 }
95}
96
97/*!
98Close the file opened by denied_open().
99*/
100void download_close(void)
101{
102 if (fp_download)
103 {
507460ae 104 if (fclose(fp_download)==EOF) {
11284535
FM
105 debuga(_("Write error in %s: %s\n"),download_unsort,strerror(errno));
106 exit(EXIT_FAILURE);
107 }
108 fp_download=NULL;
109 }
110}
111
112/*!
113Tell the caller if a download report exists.
114
115\return \c True if the report is available or \c false if no report
116was generated.
117*/
118bool is_download(void)
119{
120 return(download_exists);
121}
122
b2fa3eb6
FM
123/*!
124Sort the raw log file with the downloaded files.
125
126\param report_in The name of the file where to store the sorted entries.
73b57f55
FM
127
128The file is sorted by columns 3, 1, 2 and 5 that are the columns of the user's ID, the
129date, the time and the URL.
b2fa3eb6
FM
130*/
131static void download_sort(const char *report_in)
132{
133 int clen;
134 char csort[MAXLEN];
135 int cstatus;
bd43d81f 136
11284535
FM
137 clen=snprintf(csort,sizeof(csort),"sort -T \"%s\" -t \"\t\" -k 3,3 -k 1,1 -k 2,2 -k 5,5 -o \"%s\" \"%s\"",
138 tmp, report_in, download_unsort);
b2fa3eb6 139 if (clen>=sizeof(csort)) {
11284535 140 debuga(_("Path too long to sort the file: %s\n"),download_unsort);
b2fa3eb6
FM
141 exit(EXIT_FAILURE);
142 }
143 cstatus=system(csort);
144 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
145 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
146 debuga(_("sort command: %s\n"),csort);
147 exit(EXIT_FAILURE);
148 }
11284535
FM
149 if (!KeepTempLog) {
150 if (unlink(download_unsort)) {
151 debuga(_("Cannot delete \"%s\": %s\n"),download_unsort,strerror(errno));
152 exit(EXIT_FAILURE);
153 }
154 download_unsort[0]='\0';
b2fa3eb6
FM
155 }
156}
157
158/*!
159Generate the report of the downloaded files. The list of the suffixes to take into account
160is set with set_download_suffix().
161*/
32e71fa4 162void download_report(void)
25697a35 163{
9bd92830
FM
164 FILE *fp_in = NULL, *fp_ou = NULL;
165
166 char *buf;
167 char *url;
168 char report_in[MAXLEN];
169 char report[MAXLEN];
170 char ip[MAXLEN];
171 char oip[MAXLEN];
172 char user[MAXLEN];
173 char ouser[MAXLEN];
174 char ouser2[MAXLEN];
175 char data[15];
176 char hora[15];
177 int z=0;
178 int count=0;
179 int i;
180 int day,month,year;
181 bool new_user;
182 struct getwordstruct gwarea;
183 longline line;
184 struct userinfostruct *uinfo;
185 struct tm t;
186
11284535
FM
187 if (!download_exists) {
188 if (!KeepTempLog && download_unsort[0]!='\0' && unlink(download_unsort))
189 debuga(_("Cannot delete \"%s\": %s\n"),download_unsort,strerror(errno));
190 download_unsort[0]='\0';
cb59dc47 191 if (debugz>=LogLevel_Process) debugaz(_("No downloaded files to report\n"));
5589b847
FM
192 return;
193 }
194
b7413c4c
FM
195 if (debugz>=LogLevel_Process)
196 debuga(_("Creating download report...\n"));
9bd92830
FM
197 ouser[0]='\0';
198 ouser2[0]='\0';
199
b2fa3eb6 200 // sort the raw file
c98d6a0f 201 snprintf(report_in,sizeof(report_in),"%s/download.int_log",tmp);
b2fa3eb6 202 download_sort(report_in);
9bd92830 203
b2fa3eb6 204 // produce the report.
9bd92830
FM
205 snprintf(report,sizeof(report),"%s/download.html",outdirname);
206
207 if((fp_in=MY_FOPEN(report_in,"r"))==NULL) {
d6f0349d 208 debuga(_("(download) Cannot open log file %s: %s\n"),report_in,strerror(errno));
9bd92830
FM
209 exit(EXIT_FAILURE);
210 }
211
212 if((fp_ou=MY_FOPEN(report,"w"))==NULL) {
d6f0349d 213 debuga(_("(download) Cannot open log file %s: %s\n"),report,strerror(errno));
9bd92830
FM
214 exit(EXIT_FAILURE);
215 }
216
217 write_html_header(fp_ou,(IndexTree == INDEX_TREE_DATE) ? 3 : 1,_("Downloads"),HTML_JS_NONE);
218 fputs("<tr><td class=\"header_c\">",fp_ou);
219 fprintf(fp_ou,_("Period: %s"),period.html);
220 fputs("</td></tr>\n",fp_ou);
221 fprintf(fp_ou,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Downloads"));
222 close_html_header(fp_ou);
223
224 fputs("<div class=\"report\"><table cellpadding=\"0\" cellspacing=\"2\">\n",fp_ou);
225 fprintf(fp_ou,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"));
226
227 if ((line=longline_create())==NULL) {
228 debuga(_("Not enough memory to read the downloaded files\n"));
229 exit(EXIT_FAILURE);
230 }
231
232 while((buf=longline_read(fp_in,line))!=NULL) {
233 getword_start(&gwarea,buf);
234 if (getword(data,sizeof(data),&gwarea,'\t')<0 || getword(hora,sizeof(hora),&gwarea,'\t')<0 ||
007905af 235 getword(user,sizeof(user),&gwarea,'\t')<0 || getword(ip,sizeof(ip),&gwarea,'\t')<0) {
9bd92830
FM
236 debuga(_("There is a broken record or garbage in file %s\n"),report_in);
237 exit(EXIT_FAILURE);
238 }
239 if (getword_ptr(buf,&url,&gwarea,'\t')<0) {
240 debuga(_("There is a broken url in file %s\n"),report_in);
241 exit(EXIT_FAILURE);
242 }
243 if (sscanf(data,"%d/%d/%d",&day,&month,&year)!=3) continue;
244 computedate(year,month,day,&t);
245 strftime(data,sizeof(data),"%x",&t);
246
247 uinfo=userinfo_find_from_id(user);
248 if (!uinfo) {
249 debuga(_("Unknown user ID %s in file %s\n"),user,report_in);
250 exit(EXIT_FAILURE);
251 }
252 new_user=false;
253 if(!z) {
254 strcpy(ouser,user);
255 strcpy(oip,ip);
256 z++;
257 new_user=true;
258 } else {
259 if(strcmp(ouser,user) != 0) {
260 strcpy(ouser,user);
261 new_user=true;
262 }
263 if(strcmp(oip,ip) != 0) {
264 strcpy(oip,ip);
265 new_user=true;
266 }
267 }
268
269 if(DownloadReportLimit) {
270 if(strcmp(ouser2,uinfo->label) == 0) {
271 count++;
272 } else {
273 count=1;
274 strcpy(ouser2,uinfo->label);
275 }
276 if(count >= DownloadReportLimit)
277 continue;
278 }
279
280 for (i=strlen(url)-1 ; i>=0 && (unsigned char)url[i]<' ' ; i--) url[i]=0;
281
282 fputs("<tr>",fp_ou);
5138c1b9
FM
283 if (new_user) {
284 if (uinfo->topuser)
285 fprintf(fp_ou,"<td class=\"data\"><a href=\"%s/%s.html\">%s</a></td><td class=\"data\">%s</td>",uinfo->filename,uinfo->filename,uinfo->label,ip);
286 else
287 fprintf(fp_ou,"<td class=\"data\">%s</td><td class=\"data\">%s</td>",uinfo->label,ip);
288 } else
9bd92830
FM
289 fputs("<td class=\"data\"></td><td class=\"data\"></td>",fp_ou);
290 fprintf(fp_ou,"<td class=\"data\">%s-%s</td><td class=\"data2\">",data,hora);
67a93701 291 if(BlockIt[0]!='\0' && url[0]!=ALIAS_PREFIX) {
9bd92830
FM
292 fprintf(fp_ou,"<a href=\"%s%s?url=\"",wwwDocumentRoot,BlockIt);
293 output_html_url(fp_ou,url);
294 fprintf(fp_ou,"\"><img src=\"%s/sarg-squidguard-block.png\"></a>&nbsp;",ImageFile);
295 }
6fa33a32 296 output_html_link(fp_ou,url,100);
67a93701 297 fputs("</td></tr>\n",fp_ou);
9bd92830
FM
298 }
299 fclose(fp_in);
300 longline_destroy(&line);
301
302 fputs("</table></div>\n",fp_ou);
303 if (write_html_trailer(fp_ou)<0)
304 debuga(_("Write error in file %s\n"),report);
507460ae
FM
305 if (fclose(fp_ou)==EOF) {
306 debuga(_("Write error in %s: %s\n"),report,strerror(errno));
307 exit(EXIT_FAILURE);
308 }
9bd92830 309
11767c6a
FM
310 if (!KeepTempLog && unlink(report_in)) {
311 debuga(_("Cannot delete \"%s\": %s\n"),report_in,strerror(errno));
08f9b029
FM
312 exit(EXIT_FAILURE);
313 }
9bd92830
FM
314
315 return;
25697a35 316}
6e792ade 317
b2fa3eb6
FM
318/*!
319Free the memory allocated by set_download_suffix().
320*/
6e792ade
FM
321void free_download(void)
322{
9bd92830
FM
323 if (DownloadSuffix) {
324 free(DownloadSuffix);
325 DownloadSuffix=NULL;
326 }
327 if (DownloadSuffixIndex) {
328 free(DownloadSuffixIndex);
329 DownloadSuffixIndex=NULL;
330 }
331 NDownloadSuffix=0;
6e792ade
FM
332}
333
b2fa3eb6
FM
334/*!
335Set the list of the suffixes corresponding to the download of files you want to detect with
336is_download_suffix(). The list is sorted to make the search faster.
337
338\param list A comma separated list of the suffixes to set in ::DownloadSuffix.
339
340\note The memory allocated by this function must be freed by free_download().
341*/
6e792ade
FM
342void set_download_suffix(const char *list)
343{
9bd92830
FM
344 char *str;
345 int i, j, k;
346 int cmp;
347
348 free_download();
349
350 DownloadSuffix=strdup(list);
351 if (!DownloadSuffix) {
352 debuga(_("Download suffix list too long\n"));
353 exit(EXIT_FAILURE);
354 }
355 j = 1;
356 for (i=0 ; list[i] ; i++)
357 if (list[i] == ',') j++;
358 DownloadSuffixIndex=malloc(j*sizeof(char *));
359 if (!DownloadSuffixIndex) {
360 debuga(_("Too many download suffixes\n"));
361 exit(EXIT_FAILURE);
362 }
363
364 str = DownloadSuffix;
365 for (i=0 ; DownloadSuffix[i] ; i++) {
366 if (DownloadSuffix[i] == ',') {
367 DownloadSuffix[i] = '\0';
368 if (*str) {
369 cmp = -1;
370 for (j=0 ; j<NDownloadSuffix && (cmp=strcasecmp(str,DownloadSuffixIndex[j]))>0 ; j++);
371 if (cmp != 0) {
372 for (k=NDownloadSuffix ; k>j ; k--)
373 DownloadSuffixIndex[k]=DownloadSuffixIndex[k-1];
374 NDownloadSuffix++;
375 DownloadSuffixIndex[j]=str;
376 }
377 }
378 str=DownloadSuffix+i+1;
379 }
380 }
381
382 if (*str) {
383 cmp = -1;
384 for (j=0 ; j<NDownloadSuffix && (cmp=strcasecmp(str,DownloadSuffixIndex[j]))>0 ; j++);
385 if (cmp != 0) {
386 for (k=NDownloadSuffix ; k>j ; k--)
387 DownloadSuffixIndex[k]=DownloadSuffixIndex[k-1];
388 NDownloadSuffix++;
389 DownloadSuffixIndex[j]=str;
390 }
391 }
6e792ade
FM
392}
393
b2fa3eb6
FM
394/*!
395Tell if the URL correspond to a downloaded file. The function takes the extension at the end of the
396URL with a maximum of 9 characters and compare it to the list of the download suffix in
397::DownloadSuffix. If the suffix is found in the list, the function reports the URL as the download
398of a file.
399
400\param url The URL to test.
401
402\retval 1 The URL matches a suffix of a download.
403\retval 0 The URL is not a known download.
404
405\note A downloaded file cannot be detected if the file name is embedded in a GET or POST request. Only requests
406that ends with the file name can be detected.
407
408\note A URL embedding another web site's address ending by .com at the end of the URL will match the download
409extension com if it is defined in the ::DownloadSuffix.
410*/
2824ec9b 411bool is_download_suffix(const char *url)
6e792ade 412{
9bd92830
FM
413 int urllen;
414 int i;
415 int down, up, center;
416 const char *suffix;
417 int cmp;
418 const int max_suffix=10;
419
420 if (DownloadSuffix == NULL || NDownloadSuffix == 0) return(false);
421
422 urllen=strlen(url)-1;
423 if (urllen<=0) return(false);
424 if (url[urllen] == '.') return(false); //reject a single trailing dot
425 for (i=0 ; i<urllen && (url[i]!='/' || url[i+1]=='/') && url[i]!='?' ; i++);
426 if (i>=urllen) return(false); // url is a hostname without any path or file to download
427
428 for (i=0 ; i<=max_suffix && i<urllen && url[urllen-i]!='.' ; i++)
429 if (url[urllen-i] == '/' || url[urllen-i] == '?') return(false);
430 if (i>max_suffix || i>=urllen) return(false);
431
432 suffix=url+urllen-i+1;
433 down=0;
434 up=NDownloadSuffix-1;
435 while (down<=up) {
436 center=(down+up)/2;
437 cmp=strcasecmp(suffix,DownloadSuffixIndex[center]);
438 if (cmp == 0) return(true);
439 if (cmp < 0)
440 up = center-1;
441 else
442 down = center+1;
443 }
444 return(false);
6e792ade
FM
445}
446
11284535
FM
447/*!
448Remove any temporary file left by the download module.
449*/
450void download_cleanup(void)
451{
507460ae
FM
452 if (fp_download) {
453 if (fclose(fp_download)==EOF) {
454 debuga(_("Write error in %s: %s\n"),download_unsort,strerror(errno));
455 exit(EXIT_FAILURE);
456 }
11284535
FM
457 fp_download=NULL;
458 }
459 if (download_unsort[0]) {
460 if (unlink(download_unsort)==-1)
461 debuga(_("Failed to delete %s: %s\n"),download_unsort,strerror(errno));
462 }
463}