]> git.ipfire.org Git - thirdparty/sarg.git/blob - download.c
Display some debug messages when -z is used to track the progress
[thirdparty/sarg.git] / download.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2013
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #include "include/readlog.h"
30
31 /*!
32 The buffer to store the list of the suffixes to take into account when generating
33 the report of the downloaded files. The suffixes in the list are separated by the ASCII
34 null.
35 */
36 /*@null@*/static char *DownloadSuffix=NULL;
37
38 /*!
39 The index of all the suffixes stored in ::DownloadSuffix. The list is sorted alphabetically.
40 to speed up the search.
41 */
42 /*@null@*/static char **DownloadSuffixIndex=NULL;
43
44 /*!
45 The number of suffixes in ::DownloadSuffixIndex.
46 */
47 static int NDownloadSuffix=0;
48
49 //! Name of the file containing the unsorted downloaded entries.
50 static char download_unsort[MAXLEN]="";
51 //! The file handle to write the entries.
52 static FILE *fp_download=NULL;
53 //! \c True if at least one downloaded entry exists.
54 static bool download_exists=false;
55
56 /*!
57 Open a file to store the denied accesses.
58
59 \return The file handle or NULL if no file is necessary.
60 */
61 void download_open(void)
62 {
63 if ((ReportType & REPORT_TYPE_DOWNLOADS) == 0) {
64 if (debugz>=LogLevel_Process) debugaz(_("Download report not produced as it is not requested\n"));
65 return;
66 }
67 if (Privacy) {
68 if (debugz>=LogLevel_Process) debugaz(_("Download report not produced because privacy option is active\n"));
69 return;
70 }
71
72 snprintf(download_unsort,sizeof(download_unsort),"%s/download.int_unsort",tmp);
73 if ((fp_download=MY_FOPEN(download_unsort,"w"))==NULL) {
74 debuga(_("(log) Cannot open file %s: %s\n"),download_unsort,strerror(errno));
75 exit(EXIT_FAILURE);
76 }
77 return;
78 }
79
80 /*!
81 Write one entry in the unsorted downloaded file provided that it is required.
82
83 \param log_entry The entry to write into the log file.
84 \param url The URL of the downloaded file.
85 */
86 void download_write(const struct ReadLogStruct *log_entry,const char *url)
87 {
88 char date[80];
89
90 if (fp_download && strstr(log_entry->HttpCode,"DENIED") == 0) {
91 strftime(date,sizeof(date),"%d/%m/%Y\t%H:%M:%S",&log_entry->EntryTime);
92 fprintf(fp_download,"%s\t%s\t%s\t%s\n",date,log_entry->User,log_entry->Ip,url);
93 download_exists=true;
94 }
95 }
96
97 /*!
98 Close the file opened by denied_open().
99 */
100 void download_close(void)
101 {
102 if (fp_download)
103 {
104 if (fclose(fp_download)==EOF) {
105 debuga(_("Write error in %s: %s\n"),download_unsort,strerror(errno));
106 exit(EXIT_FAILURE);
107 }
108 fp_download=NULL;
109 }
110 }
111
112 /*!
113 Tell the caller if a download report exists.
114
115 \return \c True if the report is available or \c false if no report
116 was generated.
117 */
118 bool is_download(void)
119 {
120 return(download_exists);
121 }
122
123 /*!
124 Sort the raw log file with the downloaded files.
125
126 \param report_in The name of the file where to store the sorted entries.
127
128 The file is sorted by columns 3, 1, 2 and 5 that are the columns of the user's ID, the
129 date, the time and the URL.
130 */
131 static void download_sort(const char *report_in)
132 {
133 int clen;
134 char csort[MAXLEN];
135 int cstatus;
136
137 clen=snprintf(csort,sizeof(csort),"sort -T \"%s\" -t \"\t\" -k 3,3 -k 1,1 -k 2,2 -k 5,5 -o \"%s\" \"%s\"",
138 tmp, report_in, download_unsort);
139 if (clen>=sizeof(csort)) {
140 debuga(_("Path too long to sort the file: %s\n"),download_unsort);
141 exit(EXIT_FAILURE);
142 }
143 cstatus=system(csort);
144 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
145 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
146 debuga(_("sort command: %s\n"),csort);
147 exit(EXIT_FAILURE);
148 }
149 if (!KeepTempLog) {
150 if (unlink(download_unsort)) {
151 debuga(_("Cannot delete \"%s\": %s\n"),download_unsort,strerror(errno));
152 exit(EXIT_FAILURE);
153 }
154 download_unsort[0]='\0';
155 }
156 }
157
158 /*!
159 Generate the report of the downloaded files. The list of the suffixes to take into account
160 is set with set_download_suffix().
161 */
162 void download_report(void)
163 {
164 FILE *fp_in = NULL, *fp_ou = NULL;
165
166 char *buf;
167 char *url;
168 char report_in[MAXLEN];
169 char report[MAXLEN];
170 char ip[MAXLEN];
171 char oip[MAXLEN];
172 char user[MAXLEN];
173 char ouser[MAXLEN];
174 char ouser2[MAXLEN];
175 char data[15];
176 char hora[15];
177 int z=0;
178 int count=0;
179 int i;
180 int day,month,year;
181 bool new_user;
182 struct getwordstruct gwarea;
183 longline line;
184 struct userinfostruct *uinfo;
185 struct tm t;
186
187 if (!download_exists) {
188 if (!KeepTempLog && download_unsort[0]!='\0' && unlink(download_unsort))
189 debuga(_("Cannot delete \"%s\": %s\n"),download_unsort,strerror(errno));
190 download_unsort[0]='\0';
191 if (debugz>=LogLevel_Process) debugaz(_("No downloaded files to report\n"));
192 return;
193 }
194
195 if (debugz>=LogLevel_Process)
196 debuga(_("Creating download report...\n"));
197 ouser[0]='\0';
198 ouser2[0]='\0';
199
200 // sort the raw file
201 snprintf(report_in,sizeof(report_in),"%s/download.int_log",tmp);
202 download_sort(report_in);
203
204 // produce the report.
205 snprintf(report,sizeof(report),"%s/download.html",outdirname);
206
207 if((fp_in=MY_FOPEN(report_in,"r"))==NULL) {
208 debuga(_("(download) Cannot open log file %s: %s\n"),report_in,strerror(errno));
209 exit(EXIT_FAILURE);
210 }
211
212 if((fp_ou=MY_FOPEN(report,"w"))==NULL) {
213 debuga(_("(download) Cannot open log file %s: %s\n"),report,strerror(errno));
214 exit(EXIT_FAILURE);
215 }
216
217 write_html_header(fp_ou,(IndexTree == INDEX_TREE_DATE) ? 3 : 1,_("Downloads"),HTML_JS_NONE);
218 fputs("<tr><td class=\"header_c\">",fp_ou);
219 fprintf(fp_ou,_("Period: %s"),period.html);
220 fputs("</td></tr>\n",fp_ou);
221 fprintf(fp_ou,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Downloads"));
222 close_html_header(fp_ou);
223
224 fputs("<div class=\"report\"><table cellpadding=\"0\" cellspacing=\"2\">\n",fp_ou);
225 fprintf(fp_ou,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"));
226
227 if ((line=longline_create())==NULL) {
228 debuga(_("Not enough memory to read the downloaded files\n"));
229 exit(EXIT_FAILURE);
230 }
231
232 while((buf=longline_read(fp_in,line))!=NULL) {
233 getword_start(&gwarea,buf);
234 if (getword(data,sizeof(data),&gwarea,'\t')<0 || getword(hora,sizeof(hora),&gwarea,'\t')<0 ||
235 getword(user,sizeof(user),&gwarea,'\t')<0 || getword(ip,sizeof(ip),&gwarea,'\t')<0) {
236 debuga(_("There is a broken record or garbage in file %s\n"),report_in);
237 exit(EXIT_FAILURE);
238 }
239 if (getword_ptr(buf,&url,&gwarea,'\t')<0) {
240 debuga(_("There is a broken url in file %s\n"),report_in);
241 exit(EXIT_FAILURE);
242 }
243 if (sscanf(data,"%d/%d/%d",&day,&month,&year)!=3) continue;
244 computedate(year,month,day,&t);
245 strftime(data,sizeof(data),"%x",&t);
246
247 uinfo=userinfo_find_from_id(user);
248 if (!uinfo) {
249 debuga(_("Unknown user ID %s in file %s\n"),user,report_in);
250 exit(EXIT_FAILURE);
251 }
252 new_user=false;
253 if(!z) {
254 strcpy(ouser,user);
255 strcpy(oip,ip);
256 z++;
257 new_user=true;
258 } else {
259 if(strcmp(ouser,user) != 0) {
260 strcpy(ouser,user);
261 new_user=true;
262 }
263 if(strcmp(oip,ip) != 0) {
264 strcpy(oip,ip);
265 new_user=true;
266 }
267 }
268
269 if(DownloadReportLimit) {
270 if(strcmp(ouser2,uinfo->label) == 0) {
271 count++;
272 } else {
273 count=1;
274 strcpy(ouser2,uinfo->label);
275 }
276 if(count >= DownloadReportLimit)
277 continue;
278 }
279
280 for (i=strlen(url)-1 ; i>=0 && (unsigned char)url[i]<' ' ; i--) url[i]=0;
281
282 fputs("<tr>",fp_ou);
283 if (new_user) {
284 if (uinfo->topuser)
285 fprintf(fp_ou,"<td class=\"data\"><a href=\"%s/%s.html\">%s</a></td><td class=\"data\">%s</td>",uinfo->filename,uinfo->filename,uinfo->label,ip);
286 else
287 fprintf(fp_ou,"<td class=\"data\">%s</td><td class=\"data\">%s</td>",uinfo->label,ip);
288 } else
289 fputs("<td class=\"data\"></td><td class=\"data\"></td>",fp_ou);
290 fprintf(fp_ou,"<td class=\"data\">%s-%s</td><td class=\"data2\">",data,hora);
291 if(BlockIt[0]!='\0' && url[0]!=ALIAS_PREFIX) {
292 fprintf(fp_ou,"<a href=\"%s%s?url=\"",wwwDocumentRoot,BlockIt);
293 output_html_url(fp_ou,url);
294 fprintf(fp_ou,"\"><img src=\"%s/sarg-squidguard-block.png\"></a>&nbsp;",ImageFile);
295 }
296 output_html_link(fp_ou,url,100);
297 fputs("</td></tr>\n",fp_ou);
298 }
299 fclose(fp_in);
300 longline_destroy(&line);
301
302 fputs("</table></div>\n",fp_ou);
303 if (write_html_trailer(fp_ou)<0)
304 debuga(_("Write error in file %s\n"),report);
305 if (fclose(fp_ou)==EOF) {
306 debuga(_("Write error in %s: %s\n"),report,strerror(errno));
307 exit(EXIT_FAILURE);
308 }
309
310 if (!KeepTempLog && unlink(report_in)) {
311 debuga(_("Cannot delete \"%s\": %s\n"),report_in,strerror(errno));
312 exit(EXIT_FAILURE);
313 }
314
315 return;
316 }
317
318 /*!
319 Free the memory allocated by set_download_suffix().
320 */
321 void free_download(void)
322 {
323 if (DownloadSuffix) {
324 free(DownloadSuffix);
325 DownloadSuffix=NULL;
326 }
327 if (DownloadSuffixIndex) {
328 free(DownloadSuffixIndex);
329 DownloadSuffixIndex=NULL;
330 }
331 NDownloadSuffix=0;
332 }
333
334 /*!
335 Set the list of the suffixes corresponding to the download of files you want to detect with
336 is_download_suffix(). The list is sorted to make the search faster.
337
338 \param list A comma separated list of the suffixes to set in ::DownloadSuffix.
339
340 \note The memory allocated by this function must be freed by free_download().
341 */
342 void set_download_suffix(const char *list)
343 {
344 char *str;
345 int i, j, k;
346 int cmp;
347
348 free_download();
349
350 DownloadSuffix=strdup(list);
351 if (!DownloadSuffix) {
352 debuga(_("Download suffix list too long\n"));
353 exit(EXIT_FAILURE);
354 }
355 j = 1;
356 for (i=0 ; list[i] ; i++)
357 if (list[i] == ',') j++;
358 DownloadSuffixIndex=malloc(j*sizeof(char *));
359 if (!DownloadSuffixIndex) {
360 debuga(_("Too many download suffixes\n"));
361 exit(EXIT_FAILURE);
362 }
363
364 str = DownloadSuffix;
365 for (i=0 ; DownloadSuffix[i] ; i++) {
366 if (DownloadSuffix[i] == ',') {
367 DownloadSuffix[i] = '\0';
368 if (*str) {
369 cmp = -1;
370 for (j=0 ; j<NDownloadSuffix && (cmp=strcasecmp(str,DownloadSuffixIndex[j]))>0 ; j++);
371 if (cmp != 0) {
372 for (k=NDownloadSuffix ; k>j ; k--)
373 DownloadSuffixIndex[k]=DownloadSuffixIndex[k-1];
374 NDownloadSuffix++;
375 DownloadSuffixIndex[j]=str;
376 }
377 }
378 str=DownloadSuffix+i+1;
379 }
380 }
381
382 if (*str) {
383 cmp = -1;
384 for (j=0 ; j<NDownloadSuffix && (cmp=strcasecmp(str,DownloadSuffixIndex[j]))>0 ; j++);
385 if (cmp != 0) {
386 for (k=NDownloadSuffix ; k>j ; k--)
387 DownloadSuffixIndex[k]=DownloadSuffixIndex[k-1];
388 NDownloadSuffix++;
389 DownloadSuffixIndex[j]=str;
390 }
391 }
392 }
393
394 /*!
395 Tell if the URL correspond to a downloaded file. The function takes the extension at the end of the
396 URL with a maximum of 9 characters and compare it to the list of the download suffix in
397 ::DownloadSuffix. If the suffix is found in the list, the function reports the URL as the download
398 of a file.
399
400 \param url The URL to test.
401
402 \retval 1 The URL matches a suffix of a download.
403 \retval 0 The URL is not a known download.
404
405 \note A downloaded file cannot be detected if the file name is embedded in a GET or POST request. Only requests
406 that ends with the file name can be detected.
407
408 \note A URL embedding another web site's address ending by .com at the end of the URL will match the download
409 extension com if it is defined in the ::DownloadSuffix.
410 */
411 bool is_download_suffix(const char *url)
412 {
413 int urllen;
414 int i;
415 int down, up, center;
416 const char *suffix;
417 int cmp;
418 const int max_suffix=10;
419
420 if (DownloadSuffix == NULL || NDownloadSuffix == 0) return(false);
421
422 urllen=strlen(url)-1;
423 if (urllen<=0) return(false);
424 if (url[urllen] == '.') return(false); //reject a single trailing dot
425 for (i=0 ; i<urllen && (url[i]!='/' || url[i+1]=='/') && url[i]!='?' ; i++);
426 if (i>=urllen) return(false); // url is a hostname without any path or file to download
427
428 for (i=0 ; i<=max_suffix && i<urllen && url[urllen-i]!='.' ; i++)
429 if (url[urllen-i] == '/' || url[urllen-i] == '?') return(false);
430 if (i>max_suffix || i>=urllen) return(false);
431
432 suffix=url+urllen-i+1;
433 down=0;
434 up=NDownloadSuffix-1;
435 while (down<=up) {
436 center=(down+up)/2;
437 cmp=strcasecmp(suffix,DownloadSuffixIndex[center]);
438 if (cmp == 0) return(true);
439 if (cmp < 0)
440 up = center-1;
441 else
442 down = center+1;
443 }
444 return(false);
445 }
446
447 /*!
448 Remove any temporary file left by the download module.
449 */
450 void download_cleanup(void)
451 {
452 if (fp_download) {
453 if (fclose(fp_download)==EOF) {
454 debuga(_("Write error in %s: %s\n"),download_unsort,strerror(errno));
455 exit(EXIT_FAILURE);
456 }
457 fp_download=NULL;
458 }
459 if (download_unsort[0]) {
460 if (unlink(download_unsort)==-1)
461 debuga(_("Failed to delete %s: %s\n"),download_unsort,strerror(errno));
462 }
463 }