]> git.ipfire.org Git - thirdparty/sarg.git/blob - download.c
Merge commit '24eb624ea44bf4c82c602ce'
[thirdparty/sarg.git] / download.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2012
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 /*!
31 The buffer to store the list of the suffixes to take into account when generating
32 the report of the downloaded files. The suffixes in the list are separated by the ASCII
33 null.
34 */
35 /*@null@*/static char *DownloadSuffix=NULL;
36
37 /*!
38 The index of all the suffixes stored in ::DownloadSuffix. The list is sorted alphabetically.
39 to speed up the search.
40 */
41 /*@null@*/static char **DownloadSuffixIndex=NULL;
42
43 /*!
44 The number of suffixes in ::DownloadSuffixIndex.
45 */
46 static int NDownloadSuffix=0;
47
48 /*!
49 Sort the raw log file with the downloaded files.
50
51 \param report_in The name of the file where to store the sorted entries.
52
53 The file is sorted by columns 3, 1, 2 and 5 that are the columns of the user's ID, the
54 date, the time and the URL.
55 */
56 static void download_sort(const char *report_in)
57 {
58 int clen;
59 char csort[MAXLEN];
60 int cstatus;
61
62 clen=snprintf(csort,sizeof(csort),"sort -T \"%s\" -t \"\t\" -k 3,3 -k 1,1 -k 2,2 -k 5,5 -o \"%s\" \"%s/download.int_unsort\"",
63 tmp, report_in, tmp);
64 if (clen>=sizeof(csort)) {
65 debuga(_("Path too long to sort the file: %s/download.int_unsort\n"),tmp);
66 exit(EXIT_FAILURE);
67 }
68 cstatus=system(csort);
69 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
70 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
71 debuga(_("sort command: %s\n"),csort);
72 exit(EXIT_FAILURE);
73 }
74 if (snprintf(csort,sizeof(csort),"%s/download.int_unsort",tmp)>=sizeof(csort)) {
75 debuga(_("Path too long for %s/download.int_unsort\n"),tmp);
76 exit(EXIT_FAILURE);
77 }
78 if (unlink(csort)) {
79 debuga(_("Cannot delete %s - %s\n"),csort,strerror(errno));
80 exit(EXIT_FAILURE);
81 }
82 }
83
84 /*!
85 Generate the report of the downloaded files. The list of the suffixes to take into account
86 is set with set_download_suffix().
87 */
88 void download_report(void)
89 {
90 FILE *fp_in = NULL, *fp_ou = NULL;
91
92 char *buf;
93 char *url;
94 char report_in[MAXLEN];
95 char report[MAXLEN];
96 char ip[MAXLEN];
97 char oip[MAXLEN];
98 char user[MAXLEN];
99 char ouser[MAXLEN];
100 char ouser2[MAXLEN];
101 char data[15];
102 char hora[15];
103 int z=0;
104 int count=0;
105 int i;
106 int day,month,year;
107 bool new_user;
108 struct getwordstruct gwarea;
109 longline line;
110 struct userinfostruct *uinfo;
111 struct tm t;
112
113 ouser[0]='\0';
114 ouser2[0]='\0';
115
116 // sort the raw file
117 snprintf(report_in,sizeof(report_in),"%s/download.int_log",tmp);
118 download_sort(report_in);
119 if(access(report_in, R_OK) != 0) {
120 if (debugz) debugaz(_("Downloaded files report not generated as it is empty\n"));
121 return;
122 }
123
124 // produce the report.
125 snprintf(report,sizeof(report),"%s/download.html",outdirname);
126
127 if((fp_in=MY_FOPEN(report_in,"r"))==NULL) {
128 debuga(_("(download) Cannot open log file %s\n"),report_in);
129 exit(EXIT_FAILURE);
130 }
131
132 if((fp_ou=MY_FOPEN(report,"w"))==NULL) {
133 debuga(_("(download) Cannot open log file %s\n"),report);
134 exit(EXIT_FAILURE);
135 }
136
137 write_html_header(fp_ou,(IndexTree == INDEX_TREE_DATE) ? 3 : 1,_("Downloads"),HTML_JS_NONE);
138 fputs("<tr><td class=\"header_c\">",fp_ou);
139 fprintf(fp_ou,_("Period: %s"),period.html);
140 fputs("</td></tr>\n",fp_ou);
141 fprintf(fp_ou,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Downloads"));
142 close_html_header(fp_ou);
143
144 fputs("<div class=\"report\"><table cellpadding=\"0\" cellspacing=\"2\">\n",fp_ou);
145 fprintf(fp_ou,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"));
146
147 if ((line=longline_create())==NULL) {
148 debuga(_("Not enough memory to read the downloaded files\n"));
149 exit(EXIT_FAILURE);
150 }
151
152 while((buf=longline_read(fp_in,line))!=NULL) {
153 getword_start(&gwarea,buf);
154 if (getword(data,sizeof(data),&gwarea,'\t')<0 || getword(hora,sizeof(hora),&gwarea,'\t')<0 ||
155 getword(user,sizeof(user),&gwarea,'\t')<0 || getword(ip,sizeof(ip),&gwarea,'\t')<0) {
156 debuga(_("There is a broken record or garbage in file %s\n"),report_in);
157 exit(EXIT_FAILURE);
158 }
159 if (getword_ptr(buf,&url,&gwarea,'\t')<0) {
160 debuga(_("There is a broken url in file %s\n"),report_in);
161 exit(EXIT_FAILURE);
162 }
163 if (sscanf(data,"%d/%d/%d",&day,&month,&year)!=3) continue;
164 computedate(year,month,day,&t);
165 strftime(data,sizeof(data),"%x",&t);
166
167 uinfo=userinfo_find_from_id(user);
168 if (!uinfo) {
169 debuga(_("Unknown user ID %s in file %s\n"),user,report_in);
170 exit(EXIT_FAILURE);
171 }
172 new_user=false;
173 if(!z) {
174 strcpy(ouser,user);
175 strcpy(oip,ip);
176 z++;
177 new_user=true;
178 } else {
179 if(strcmp(ouser,user) != 0) {
180 strcpy(ouser,user);
181 new_user=true;
182 }
183 if(strcmp(oip,ip) != 0) {
184 strcpy(oip,ip);
185 new_user=true;
186 }
187 }
188
189 if(DownloadReportLimit) {
190 if(strcmp(ouser2,uinfo->label) == 0) {
191 count++;
192 } else {
193 count=1;
194 strcpy(ouser2,uinfo->label);
195 }
196 if(count >= DownloadReportLimit)
197 continue;
198 }
199
200 for (i=strlen(url)-1 ; i>=0 && (unsigned char)url[i]<' ' ; i--) url[i]=0;
201
202 fputs("<tr>",fp_ou);
203 if (new_user) {
204 if (uinfo->topuser)
205 fprintf(fp_ou,"<td class=\"data\"><a href=\"%s/%s.html\">%s</a></td><td class=\"data\">%s</td>",uinfo->filename,uinfo->filename,uinfo->label,ip);
206 else
207 fprintf(fp_ou,"<td class=\"data\">%s</td><td class=\"data\">%s</td>",uinfo->label,ip);
208 } else
209 fputs("<td class=\"data\"></td><td class=\"data\"></td>",fp_ou);
210 fprintf(fp_ou,"<td class=\"data\">%s-%s</td><td class=\"data2\">",data,hora);
211 if(BlockIt[0]!='\0' && url[0]!=ALIAS_PREFIX) {
212 fprintf(fp_ou,"<a href=\"%s%s?url=\"",wwwDocumentRoot,BlockIt);
213 output_html_url(fp_ou,url);
214 fprintf(fp_ou,"\"><img src=\"%s/sarg-squidguard-block.png\"></a>&nbsp;",ImageFile);
215 }
216 output_html_link(fp_ou,url,100);
217 fputs("</td></tr>\n",fp_ou);
218 }
219 fclose(fp_in);
220 longline_destroy(&line);
221
222 fputs("</table></div>\n",fp_ou);
223 if (write_html_trailer(fp_ou)<0)
224 debuga(_("Write error in file %s\n"),report);
225 if (fclose(fp_ou)==EOF)
226 debuga(_("Failed to close file %s - %s\n"),report,strerror(errno));
227
228 if (unlink(report_in)) {
229 debuga(_("Cannot delete %s - %s\n"),report_in,strerror(errno));
230 exit(EXIT_FAILURE);
231 }
232
233 return;
234 }
235
236 /*!
237 Free the memory allocated by set_download_suffix().
238 */
239 void free_download(void)
240 {
241 if (DownloadSuffix) {
242 free(DownloadSuffix);
243 DownloadSuffix=NULL;
244 }
245 if (DownloadSuffixIndex) {
246 free(DownloadSuffixIndex);
247 DownloadSuffixIndex=NULL;
248 }
249 NDownloadSuffix=0;
250 }
251
252 /*!
253 Set the list of the suffixes corresponding to the download of files you want to detect with
254 is_download_suffix(). The list is sorted to make the search faster.
255
256 \param list A comma separated list of the suffixes to set in ::DownloadSuffix.
257
258 \note The memory allocated by this function must be freed by free_download().
259 */
260 void set_download_suffix(const char *list)
261 {
262 char *str;
263 int i, j, k;
264 int cmp;
265
266 free_download();
267
268 DownloadSuffix=strdup(list);
269 if (!DownloadSuffix) {
270 debuga(_("Download suffix list too long\n"));
271 exit(EXIT_FAILURE);
272 }
273 j = 1;
274 for (i=0 ; list[i] ; i++)
275 if (list[i] == ',') j++;
276 DownloadSuffixIndex=malloc(j*sizeof(char *));
277 if (!DownloadSuffixIndex) {
278 debuga(_("Too many download suffixes\n"));
279 exit(EXIT_FAILURE);
280 }
281
282 str = DownloadSuffix;
283 for (i=0 ; DownloadSuffix[i] ; i++) {
284 if (DownloadSuffix[i] == ',') {
285 DownloadSuffix[i] = '\0';
286 if (*str) {
287 cmp = -1;
288 for (j=0 ; j<NDownloadSuffix && (cmp=strcasecmp(str,DownloadSuffixIndex[j]))>0 ; j++);
289 if (cmp != 0) {
290 for (k=NDownloadSuffix ; k>j ; k--)
291 DownloadSuffixIndex[k]=DownloadSuffixIndex[k-1];
292 NDownloadSuffix++;
293 DownloadSuffixIndex[j]=str;
294 }
295 }
296 str=DownloadSuffix+i+1;
297 }
298 }
299
300 if (*str) {
301 cmp = -1;
302 for (j=0 ; j<NDownloadSuffix && (cmp=strcasecmp(str,DownloadSuffixIndex[j]))>0 ; j++);
303 if (cmp != 0) {
304 for (k=NDownloadSuffix ; k>j ; k--)
305 DownloadSuffixIndex[k]=DownloadSuffixIndex[k-1];
306 NDownloadSuffix++;
307 DownloadSuffixIndex[j]=str;
308 }
309 }
310 }
311
312 /*!
313 Tell if the URL correspond to a downloaded file. The function takes the extension at the end of the
314 URL with a maximum of 9 characters and compare it to the list of the download suffix in
315 ::DownloadSuffix. If the suffix is found in the list, the function reports the URL as the download
316 of a file.
317
318 \param url The URL to test.
319
320 \retval 1 The URL matches a suffix of a download.
321 \retval 0 The URL is not a known download.
322
323 \note A downloaded file cannot be detected if the file name is embedded in a GET or POST request. Only requests
324 that ends with the file name can be detected.
325
326 \note A URL embedding another web site's address ending by .com at the end of the URL will match the download
327 extension com if it is defined in the ::DownloadSuffix.
328 */
329 bool is_download_suffix(const char *url)
330 {
331 int urllen;
332 int i;
333 int down, up, center;
334 const char *suffix;
335 int cmp;
336 const int max_suffix=10;
337
338 if (DownloadSuffix == NULL || NDownloadSuffix == 0) return(false);
339
340 urllen=strlen(url)-1;
341 if (urllen<=0) return(false);
342 if (url[urllen] == '.') return(false); //reject a single trailing dot
343 for (i=0 ; i<urllen && (url[i]!='/' || url[i+1]=='/') && url[i]!='?' ; i++);
344 if (i>=urllen) return(false); // url is a hostname without any path or file to download
345
346 for (i=0 ; i<=max_suffix && i<urllen && url[urllen-i]!='.' ; i++)
347 if (url[urllen-i] == '/' || url[urllen-i] == '?') return(false);
348 if (i>max_suffix || i>=urllen) return(false);
349
350 suffix=url+urllen-i+1;
351 down=0;
352 up=NDownloadSuffix-1;
353 while (down<=up) {
354 center=(down+up)/2;
355 cmp=strcasecmp(suffix,DownloadSuffixIndex[center]);
356 if (cmp == 0) return(true);
357 if (cmp < 0)
358 up = center-1;
359 else
360 down = center+1;
361 }
362 return(false);
363 }
364