]> git.ipfire.org Git - thirdparty/sarg.git/blob - download.c
Merge the tiny charset name convertor function
[thirdparty/sarg.git] / download.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2013
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
30
31 /*!
32 The buffer to store the list of the suffixes to take into account when generating
33 the report of the downloaded files. The suffixes in the list are separated by the ASCII
34 null.
35 */
36 /*@null@*/static char *DownloadSuffix=NULL;
37
38 /*!
39 The index of all the suffixes stored in ::DownloadSuffix. The list is sorted alphabetically.
40 to speed up the search.
41 */
42 /*@null@*/static char **DownloadSuffixIndex=NULL;
43
44 /*!
45 The number of suffixes in ::DownloadSuffixIndex.
46 */
47 static int NDownloadSuffix=0;
48
49 //! Name of the file containing the unsorted downloaded entries.
50 static char download_unsort[MAXLEN]="";
51 //! The file handle to write the entries.
52 static FILE *fp_download=NULL;
53 //! \c True if at least one downloaded entry exists.
54 static bool download_exists=false;
55
56 /*!
57 Open a file to store the denied accesses.
58
59 \return The file handle or NULL if no file is necessary.
60 */
61 void download_open(void)
62 {
63 if ((ReportType & REPORT_TYPE_DOWNLOADS) == 0) {
64 if (debugz) debugaz(_("Download report not produced as it is not requested\n"));
65 return;
66 }
67 if (Privacy) {
68 if (debugz) debugaz(_("Download report not produced because privacy option is active\n"));
69 return;
70 }
71
72 snprintf(download_unsort,sizeof(download_unsort),"%s/download.int_unsort",tmp);
73 if ((fp_download=MY_FOPEN(download_unsort,"w"))==NULL) {
74 debuga(_("(log) Cannot open file %s: %s\n"),download_unsort,strerror(errno));
75 exit(EXIT_FAILURE);
76 }
77 return;
78 }
79
80 /*!
81 Write one entry in the unsorted downloaded file provided that it is required.
82
83 \param log_entry The entry to write into the log file.
84 \param url The URL of the downloaded file.
85 */
86 void download_write(const struct ReadLogStruct *log_entry,const char *url)
87 {
88 char date[80];
89
90 if (fp_download && strstr(log_entry->HttpCode,"DENIED") == 0) {
91 strftime(date,sizeof(date),"%d/%m/%Y\t%H:%M:%S",&log_entry->EntryTime);
92 fprintf(fp_download,"%s\t%s\t%s\t%s\n",date,log_entry->User,log_entry->Ip,url);
93 download_exists=true;
94 }
95 }
96
97 /*!
98 Close the file opened by denied_open().
99 */
100 void download_close(void)
101 {
102 if (fp_download)
103 {
104 if (fclose(fp_download)==EOF) {
105 debuga(_("Write error in %s: %s\n"),download_unsort,strerror(errno));
106 exit(EXIT_FAILURE);
107 }
108 fp_download=NULL;
109 }
110 }
111
112 /*!
113 Tell the caller if a download report exists.
114
115 \return \c True if the report is available or \c false if no report
116 was generated.
117 */
118 bool is_download(void)
119 {
120 return(download_exists);
121 }
122
123 /*!
124 Sort the raw log file with the downloaded files.
125
126 \param report_in The name of the file where to store the sorted entries.
127
128 The file is sorted by columns 3, 1, 2 and 5 that are the columns of the user's ID, the
129 date, the time and the URL.
130 */
131 static void download_sort(const char *report_in)
132 {
133 int clen;
134 char csort[MAXLEN];
135 int cstatus;
136
137 clen=snprintf(csort,sizeof(csort),"sort -T \"%s\" -t \"\t\" -k 3,3 -k 1,1 -k 2,2 -k 5,5 -o \"%s\" \"%s\"",
138 tmp, report_in, download_unsort);
139 if (clen>=sizeof(csort)) {
140 debuga(_("Path too long to sort the file: %s\n"),download_unsort);
141 exit(EXIT_FAILURE);
142 }
143 cstatus=system(csort);
144 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
145 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
146 debuga(_("sort command: %s\n"),csort);
147 exit(EXIT_FAILURE);
148 }
149 if (!KeepTempLog) {
150 if (unlink(download_unsort)) {
151 debuga(_("Cannot delete \"%s\": %s\n"),download_unsort,strerror(errno));
152 exit(EXIT_FAILURE);
153 }
154 download_unsort[0]='\0';
155 }
156 }
157
158 /*!
159 Generate the report of the downloaded files. The list of the suffixes to take into account
160 is set with set_download_suffix().
161 */
162 void download_report(void)
163 {
164 FILE *fp_in = NULL, *fp_ou = NULL;
165
166 char *buf;
167 char *url;
168 char report_in[MAXLEN];
169 char report[MAXLEN];
170 char ip[MAXLEN];
171 char oip[MAXLEN];
172 char user[MAXLEN];
173 char ouser[MAXLEN];
174 char ouser2[MAXLEN];
175 char data[15];
176 char hora[15];
177 int z=0;
178 int count=0;
179 int i;
180 int day,month,year;
181 bool new_user;
182 struct getwordstruct gwarea;
183 longline line;
184 struct userinfostruct *uinfo;
185 struct tm t;
186
187 if (!download_exists) {
188 if (!KeepTempLog && download_unsort[0]!='\0' && unlink(download_unsort))
189 debuga(_("Cannot delete \"%s\": %s\n"),download_unsort,strerror(errno));
190 download_unsort[0]='\0';
191 if (debugz) debugaz(_("No downloaded files to report\n"));
192 return;
193 }
194
195 ouser[0]='\0';
196 ouser2[0]='\0';
197
198 // sort the raw file
199 snprintf(report_in,sizeof(report_in),"%s/download.int_log",tmp);
200 download_sort(report_in);
201
202 // produce the report.
203 snprintf(report,sizeof(report),"%s/download.html",outdirname);
204
205 if((fp_in=MY_FOPEN(report_in,"r"))==NULL) {
206 debuga(_("(download) Cannot open log file %s: %s\n"),report_in,strerror(errno));
207 exit(EXIT_FAILURE);
208 }
209
210 if((fp_ou=MY_FOPEN(report,"w"))==NULL) {
211 debuga(_("(download) Cannot open log file %s: %s\n"),report,strerror(errno));
212 exit(EXIT_FAILURE);
213 }
214
215 write_html_header(fp_ou,(IndexTree == INDEX_TREE_DATE) ? 3 : 1,_("Downloads"),HTML_JS_NONE);
216 fputs("<tr><td class=\"header_c\">",fp_ou);
217 fprintf(fp_ou,_("Period: %s"),period.html);
218 fputs("</td></tr>\n",fp_ou);
219 fprintf(fp_ou,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Downloads"));
220 close_html_header(fp_ou);
221
222 fputs("<div class=\"report\"><table cellpadding=\"0\" cellspacing=\"2\">\n",fp_ou);
223 fprintf(fp_ou,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"));
224
225 if ((line=longline_create())==NULL) {
226 debuga(_("Not enough memory to read the downloaded files\n"));
227 exit(EXIT_FAILURE);
228 }
229
230 while((buf=longline_read(fp_in,line))!=NULL) {
231 getword_start(&gwarea,buf);
232 if (getword(data,sizeof(data),&gwarea,'\t')<0 || getword(hora,sizeof(hora),&gwarea,'\t')<0 ||
233 getword(user,sizeof(user),&gwarea,'\t')<0 || getword(ip,sizeof(ip),&gwarea,'\t')<0) {
234 debuga(_("There is a broken record or garbage in file %s\n"),report_in);
235 exit(EXIT_FAILURE);
236 }
237 if (getword_ptr(buf,&url,&gwarea,'\t')<0) {
238 debuga(_("There is a broken url in file %s\n"),report_in);
239 exit(EXIT_FAILURE);
240 }
241 if (sscanf(data,"%d/%d/%d",&day,&month,&year)!=3) continue;
242 computedate(year,month,day,&t);
243 strftime(data,sizeof(data),"%x",&t);
244
245 uinfo=userinfo_find_from_id(user);
246 if (!uinfo) {
247 debuga(_("Unknown user ID %s in file %s\n"),user,report_in);
248 exit(EXIT_FAILURE);
249 }
250 new_user=false;
251 if(!z) {
252 strcpy(ouser,user);
253 strcpy(oip,ip);
254 z++;
255 new_user=true;
256 } else {
257 if(strcmp(ouser,user) != 0) {
258 strcpy(ouser,user);
259 new_user=true;
260 }
261 if(strcmp(oip,ip) != 0) {
262 strcpy(oip,ip);
263 new_user=true;
264 }
265 }
266
267 if(DownloadReportLimit) {
268 if(strcmp(ouser2,uinfo->label) == 0) {
269 count++;
270 } else {
271 count=1;
272 strcpy(ouser2,uinfo->label);
273 }
274 if(count >= DownloadReportLimit)
275 continue;
276 }
277
278 for (i=strlen(url)-1 ; i>=0 && (unsigned char)url[i]<' ' ; i--) url[i]=0;
279
280 fputs("<tr>",fp_ou);
281 if (new_user) {
282 if (uinfo->topuser)
283 fprintf(fp_ou,"<td class=\"data\"><a href=\"%s/%s.html\">%s</a></td><td class=\"data\">%s</td>",uinfo->filename,uinfo->filename,uinfo->label,ip);
284 else
285 fprintf(fp_ou,"<td class=\"data\">%s</td><td class=\"data\">%s</td>",uinfo->label,ip);
286 } else
287 fputs("<td class=\"data\"></td><td class=\"data\"></td>",fp_ou);
288 fprintf(fp_ou,"<td class=\"data\">%s-%s</td><td class=\"data2\">",data,hora);
289 if(BlockIt[0]!='\0' && url[0]!=ALIAS_PREFIX) {
290 fprintf(fp_ou,"<a href=\"%s%s?url=\"",wwwDocumentRoot,BlockIt);
291 output_html_url(fp_ou,url);
292 fprintf(fp_ou,"\"><img src=\"%s/sarg-squidguard-block.png\"></a>&nbsp;",ImageFile);
293 }
294 output_html_link(fp_ou,url,100);
295 fputs("</td></tr>\n",fp_ou);
296 }
297 fclose(fp_in);
298 longline_destroy(&line);
299
300 fputs("</table></div>\n",fp_ou);
301 if (write_html_trailer(fp_ou)<0)
302 debuga(_("Write error in file %s\n"),report);
303 if (fclose(fp_ou)==EOF) {
304 debuga(_("Write error in %s: %s\n"),report,strerror(errno));
305 exit(EXIT_FAILURE);
306 }
307
308 if (!KeepTempLog && unlink(report_in)) {
309 debuga(_("Cannot delete \"%s\": %s\n"),report_in,strerror(errno));
310 exit(EXIT_FAILURE);
311 }
312
313 return;
314 }
315
316 /*!
317 Free the memory allocated by set_download_suffix().
318 */
319 void free_download(void)
320 {
321 if (DownloadSuffix) {
322 free(DownloadSuffix);
323 DownloadSuffix=NULL;
324 }
325 if (DownloadSuffixIndex) {
326 free(DownloadSuffixIndex);
327 DownloadSuffixIndex=NULL;
328 }
329 NDownloadSuffix=0;
330 }
331
332 /*!
333 Set the list of the suffixes corresponding to the download of files you want to detect with
334 is_download_suffix(). The list is sorted to make the search faster.
335
336 \param list A comma separated list of the suffixes to set in ::DownloadSuffix.
337
338 \note The memory allocated by this function must be freed by free_download().
339 */
340 void set_download_suffix(const char *list)
341 {
342 char *str;
343 int i, j, k;
344 int cmp;
345
346 free_download();
347
348 DownloadSuffix=strdup(list);
349 if (!DownloadSuffix) {
350 debuga(_("Download suffix list too long\n"));
351 exit(EXIT_FAILURE);
352 }
353 j = 1;
354 for (i=0 ; list[i] ; i++)
355 if (list[i] == ',') j++;
356 DownloadSuffixIndex=malloc(j*sizeof(char *));
357 if (!DownloadSuffixIndex) {
358 debuga(_("Too many download suffixes\n"));
359 exit(EXIT_FAILURE);
360 }
361
362 str = DownloadSuffix;
363 for (i=0 ; DownloadSuffix[i] ; i++) {
364 if (DownloadSuffix[i] == ',') {
365 DownloadSuffix[i] = '\0';
366 if (*str) {
367 cmp = -1;
368 for (j=0 ; j<NDownloadSuffix && (cmp=strcasecmp(str,DownloadSuffixIndex[j]))>0 ; j++);
369 if (cmp != 0) {
370 for (k=NDownloadSuffix ; k>j ; k--)
371 DownloadSuffixIndex[k]=DownloadSuffixIndex[k-1];
372 NDownloadSuffix++;
373 DownloadSuffixIndex[j]=str;
374 }
375 }
376 str=DownloadSuffix+i+1;
377 }
378 }
379
380 if (*str) {
381 cmp = -1;
382 for (j=0 ; j<NDownloadSuffix && (cmp=strcasecmp(str,DownloadSuffixIndex[j]))>0 ; j++);
383 if (cmp != 0) {
384 for (k=NDownloadSuffix ; k>j ; k--)
385 DownloadSuffixIndex[k]=DownloadSuffixIndex[k-1];
386 NDownloadSuffix++;
387 DownloadSuffixIndex[j]=str;
388 }
389 }
390 }
391
392 /*!
393 Tell if the URL correspond to a downloaded file. The function takes the extension at the end of the
394 URL with a maximum of 9 characters and compare it to the list of the download suffix in
395 ::DownloadSuffix. If the suffix is found in the list, the function reports the URL as the download
396 of a file.
397
398 \param url The URL to test.
399
400 \retval 1 The URL matches a suffix of a download.
401 \retval 0 The URL is not a known download.
402
403 \note A downloaded file cannot be detected if the file name is embedded in a GET or POST request. Only requests
404 that ends with the file name can be detected.
405
406 \note A URL embedding another web site's address ending by .com at the end of the URL will match the download
407 extension com if it is defined in the ::DownloadSuffix.
408 */
409 bool is_download_suffix(const char *url)
410 {
411 int urllen;
412 int i;
413 int down, up, center;
414 const char *suffix;
415 int cmp;
416 const int max_suffix=10;
417
418 if (DownloadSuffix == NULL || NDownloadSuffix == 0) return(false);
419
420 urllen=strlen(url)-1;
421 if (urllen<=0) return(false);
422 if (url[urllen] == '.') return(false); //reject a single trailing dot
423 for (i=0 ; i<urllen && (url[i]!='/' || url[i+1]=='/') && url[i]!='?' ; i++);
424 if (i>=urllen) return(false); // url is a hostname without any path or file to download
425
426 for (i=0 ; i<=max_suffix && i<urllen && url[urllen-i]!='.' ; i++)
427 if (url[urllen-i] == '/' || url[urllen-i] == '?') return(false);
428 if (i>max_suffix || i>=urllen) return(false);
429
430 suffix=url+urllen-i+1;
431 down=0;
432 up=NDownloadSuffix-1;
433 while (down<=up) {
434 center=(down+up)/2;
435 cmp=strcasecmp(suffix,DownloadSuffixIndex[center]);
436 if (cmp == 0) return(true);
437 if (cmp < 0)
438 up = center-1;
439 else
440 down = center+1;
441 }
442 return(false);
443 }
444
445 /*!
446 Remove any temporary file left by the download module.
447 */
448 void download_cleanup(void)
449 {
450 if (fp_download) {
451 if (fclose(fp_download)==EOF) {
452 debuga(_("Write error in %s: %s\n"),download_unsort,strerror(errno));
453 exit(EXIT_FAILURE);
454 }
455 fp_download=NULL;
456 }
457 if (download_unsort[0]) {
458 if (unlink(download_unsort)==-1)
459 debuga(_("Failed to delete %s: %s\n"),download_unsort,strerror(errno));
460 }
461 }