]> git.ipfire.org Git - thirdparty/sarg.git/blob - download.c
Fix the documentation for ntlm_user_format
[thirdparty/sarg.git] / download.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2012
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 /*!
31 The buffer to store the list of the suffixes to take into account when generating
32 the report of the downloaded files. The suffixes in the list are separated by the ASCII
33 null.
34 */
35 /*@null@*/static char *DownloadSuffix=NULL;
36
37 /*!
38 The index of all the suffixes stored in ::DownloadSuffix. The list is sorted alphabetically.
39 to speed up the search.
40 */
41 /*@null@*/static char **DownloadSuffixIndex=NULL;
42
43 /*!
44 The number of suffixes in ::DownloadSuffixIndex.
45 */
46 static int NDownloadSuffix=0;
47
48 /*!
49 Sort the raw log file with the downloaded files.
50
51 \param report_in The name of the file where to store the sorted entries.
52
53 The file is sorted by columns 3, 1, 2 and 5 that are the columns of the user's ID, the
54 date, the time and the URL.
55 */
56 static void download_sort(const char *report_in)
57 {
58 int clen;
59 char csort[MAXLEN];
60 int cstatus;
61
62 clen=snprintf(csort,sizeof(csort),"sort -T \"%s\" -t \"\t\" -k 3,3 -k 1,1 -k 2,2 -k 5,5 -o \"%s\" \"%s/download.int_unsort\"",
63 tmp, report_in, tmp);
64 if (clen>=sizeof(csort)) {
65 debuga(_("Path too long to sort the file: %s/download.int_unsort\n"),tmp);
66 exit(EXIT_FAILURE);
67 }
68 cstatus=system(csort);
69 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
70 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
71 debuga(_("sort command: %s\n"),csort);
72 exit(EXIT_FAILURE);
73 }
74 if (snprintf(csort,sizeof(csort),"%s/download.int_unsort",tmp)>=sizeof(csort)) {
75 debuga(_("Path too long for %s/download.int_unsort\n"),tmp);
76 exit(EXIT_FAILURE);
77 }
78 if (unlink(csort)) {
79 debuga(_("Cannot delete %s - %s\n"),csort,strerror(errno));
80 exit(EXIT_FAILURE);
81 }
82 }
83
84 /*!
85 Generate the report of the downloaded files. The list of the suffixes to take into account
86 is set with set_download_suffix().
87 */
88 void download_report(void)
89 {
90 FILE *fp_in = NULL, *fp_ou = NULL;
91
92 char *buf;
93 char *url;
94 char report_in[MAXLEN];
95 char report[MAXLEN];
96 char ip[MAXLEN];
97 char oip[MAXLEN];
98 char user[MAXLEN];
99 char ouser[MAXLEN];
100 char ouser2[MAXLEN];
101 char data[15];
102 char hora[15];
103 int z=0;
104 int count=0;
105 int i;
106 int day,month,year;
107 bool new_user;
108 struct getwordstruct gwarea;
109 longline line;
110 struct userinfostruct *uinfo;
111 struct tm t;
112
113 if (!ndownload) {
114 if (debugz) debugaz(_("No downloaded files to report\n"));
115 return;
116 }
117
118 ouser[0]='\0';
119 ouser2[0]='\0';
120
121 // sort the raw file
122 snprintf(report_in,sizeof(report_in),"%s/download.int_log",tmp);
123 download_sort(report_in);
124 if(access(report_in, R_OK) != 0) {
125 debugaz(_("Sorted file doesn't exist (to produce the download report)\n"));
126 exit(EXIT_FAILURE);
127 }
128
129 // produce the report.
130 snprintf(report,sizeof(report),"%s/download.html",outdirname);
131
132 if((fp_in=MY_FOPEN(report_in,"r"))==NULL) {
133 debuga(_("(download) Cannot open log file %s\n"),report_in);
134 exit(EXIT_FAILURE);
135 }
136
137 if((fp_ou=MY_FOPEN(report,"w"))==NULL) {
138 debuga(_("(download) Cannot open log file %s\n"),report);
139 exit(EXIT_FAILURE);
140 }
141
142 write_html_header(fp_ou,(IndexTree == INDEX_TREE_DATE) ? 3 : 1,_("Downloads"),HTML_JS_NONE);
143 fputs("<tr><td class=\"header_c\">",fp_ou);
144 fprintf(fp_ou,_("Period: %s"),period.html);
145 fputs("</td></tr>\n",fp_ou);
146 fprintf(fp_ou,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Downloads"));
147 close_html_header(fp_ou);
148
149 fputs("<div class=\"report\"><table cellpadding=\"0\" cellspacing=\"2\">\n",fp_ou);
150 fprintf(fp_ou,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"));
151
152 if ((line=longline_create())==NULL) {
153 debuga(_("Not enough memory to read the downloaded files\n"));
154 exit(EXIT_FAILURE);
155 }
156
157 while((buf=longline_read(fp_in,line))!=NULL) {
158 getword_start(&gwarea,buf);
159 if (getword(data,sizeof(data),&gwarea,'\t')<0 || getword(hora,sizeof(hora),&gwarea,'\t')<0 ||
160 getword(user,sizeof(user),&gwarea,'\t')<0 || getword(ip,sizeof(ip),&gwarea,'\t')<0) {
161 debuga(_("There is a broken record or garbage in file %s\n"),report_in);
162 exit(EXIT_FAILURE);
163 }
164 if (getword_ptr(buf,&url,&gwarea,'\t')<0) {
165 debuga(_("There is a broken url in file %s\n"),report_in);
166 exit(EXIT_FAILURE);
167 }
168 if (sscanf(data,"%d/%d/%d",&day,&month,&year)!=3) continue;
169 computedate(year,month,day,&t);
170 strftime(data,sizeof(data),"%x",&t);
171
172 uinfo=userinfo_find_from_id(user);
173 if (!uinfo) {
174 debuga(_("Unknown user ID %s in file %s\n"),user,report_in);
175 exit(EXIT_FAILURE);
176 }
177 new_user=false;
178 if(!z) {
179 strcpy(ouser,user);
180 strcpy(oip,ip);
181 z++;
182 new_user=true;
183 } else {
184 if(strcmp(ouser,user) != 0) {
185 strcpy(ouser,user);
186 new_user=true;
187 }
188 if(strcmp(oip,ip) != 0) {
189 strcpy(oip,ip);
190 new_user=true;
191 }
192 }
193
194 if(DownloadReportLimit) {
195 if(strcmp(ouser2,uinfo->label) == 0) {
196 count++;
197 } else {
198 count=1;
199 strcpy(ouser2,uinfo->label);
200 }
201 if(count >= DownloadReportLimit)
202 continue;
203 }
204
205 for (i=strlen(url)-1 ; i>=0 && (unsigned char)url[i]<' ' ; i--) url[i]=0;
206
207 fputs("<tr>",fp_ou);
208 if (new_user) {
209 if (uinfo->topuser)
210 fprintf(fp_ou,"<td class=\"data\"><a href=\"%s/%s.html\">%s</a></td><td class=\"data\">%s</td>",uinfo->filename,uinfo->filename,uinfo->label,ip);
211 else
212 fprintf(fp_ou,"<td class=\"data\">%s</td><td class=\"data\">%s</td>",uinfo->label,ip);
213 } else
214 fputs("<td class=\"data\"></td><td class=\"data\"></td>",fp_ou);
215 fprintf(fp_ou,"<td class=\"data\">%s-%s</td><td class=\"data2\">",data,hora);
216 if(BlockIt[0]!='\0' && url[0]!=ALIAS_PREFIX) {
217 fprintf(fp_ou,"<a href=\"%s%s?url=\"",wwwDocumentRoot,BlockIt);
218 output_html_url(fp_ou,url);
219 fprintf(fp_ou,"\"><img src=\"%s/sarg-squidguard-block.png\"></a>&nbsp;",ImageFile);
220 }
221 output_html_link(fp_ou,url,100);
222 fputs("</td></tr>\n",fp_ou);
223 }
224 fclose(fp_in);
225 longline_destroy(&line);
226
227 fputs("</table></div>\n",fp_ou);
228 if (write_html_trailer(fp_ou)<0)
229 debuga(_("Write error in file %s\n"),report);
230 if (fclose(fp_ou)==EOF)
231 debuga(_("Failed to close file %s - %s\n"),report,strerror(errno));
232
233 if (unlink(report_in)) {
234 debuga(_("Cannot delete %s - %s\n"),report_in,strerror(errno));
235 exit(EXIT_FAILURE);
236 }
237
238 return;
239 }
240
241 /*!
242 Free the memory allocated by set_download_suffix().
243 */
244 void free_download(void)
245 {
246 if (DownloadSuffix) {
247 free(DownloadSuffix);
248 DownloadSuffix=NULL;
249 }
250 if (DownloadSuffixIndex) {
251 free(DownloadSuffixIndex);
252 DownloadSuffixIndex=NULL;
253 }
254 NDownloadSuffix=0;
255 }
256
257 /*!
258 Set the list of the suffixes corresponding to the download of files you want to detect with
259 is_download_suffix(). The list is sorted to make the search faster.
260
261 \param list A comma separated list of the suffixes to set in ::DownloadSuffix.
262
263 \note The memory allocated by this function must be freed by free_download().
264 */
265 void set_download_suffix(const char *list)
266 {
267 char *str;
268 int i, j, k;
269 int cmp;
270
271 free_download();
272
273 DownloadSuffix=strdup(list);
274 if (!DownloadSuffix) {
275 debuga(_("Download suffix list too long\n"));
276 exit(EXIT_FAILURE);
277 }
278 j = 1;
279 for (i=0 ; list[i] ; i++)
280 if (list[i] == ',') j++;
281 DownloadSuffixIndex=malloc(j*sizeof(char *));
282 if (!DownloadSuffixIndex) {
283 debuga(_("Too many download suffixes\n"));
284 exit(EXIT_FAILURE);
285 }
286
287 str = DownloadSuffix;
288 for (i=0 ; DownloadSuffix[i] ; i++) {
289 if (DownloadSuffix[i] == ',') {
290 DownloadSuffix[i] = '\0';
291 if (*str) {
292 cmp = -1;
293 for (j=0 ; j<NDownloadSuffix && (cmp=strcasecmp(str,DownloadSuffixIndex[j]))>0 ; j++);
294 if (cmp != 0) {
295 for (k=NDownloadSuffix ; k>j ; k--)
296 DownloadSuffixIndex[k]=DownloadSuffixIndex[k-1];
297 NDownloadSuffix++;
298 DownloadSuffixIndex[j]=str;
299 }
300 }
301 str=DownloadSuffix+i+1;
302 }
303 }
304
305 if (*str) {
306 cmp = -1;
307 for (j=0 ; j<NDownloadSuffix && (cmp=strcasecmp(str,DownloadSuffixIndex[j]))>0 ; j++);
308 if (cmp != 0) {
309 for (k=NDownloadSuffix ; k>j ; k--)
310 DownloadSuffixIndex[k]=DownloadSuffixIndex[k-1];
311 NDownloadSuffix++;
312 DownloadSuffixIndex[j]=str;
313 }
314 }
315 }
316
317 /*!
318 Tell if the URL correspond to a downloaded file. The function takes the extension at the end of the
319 URL with a maximum of 9 characters and compare it to the list of the download suffix in
320 ::DownloadSuffix. If the suffix is found in the list, the function reports the URL as the download
321 of a file.
322
323 \param url The URL to test.
324
325 \retval 1 The URL matches a suffix of a download.
326 \retval 0 The URL is not a known download.
327
328 \note A downloaded file cannot be detected if the file name is embedded in a GET or POST request. Only requests
329 that ends with the file name can be detected.
330
331 \note A URL embedding another web site's address ending by .com at the end of the URL will match the download
332 extension com if it is defined in the ::DownloadSuffix.
333 */
334 bool is_download_suffix(const char *url)
335 {
336 int urllen;
337 int i;
338 int down, up, center;
339 const char *suffix;
340 int cmp;
341 const int max_suffix=10;
342
343 if (DownloadSuffix == NULL || NDownloadSuffix == 0) return(false);
344
345 urllen=strlen(url)-1;
346 if (urllen<=0) return(false);
347 if (url[urllen] == '.') return(false); //reject a single trailing dot
348 for (i=0 ; i<urllen && (url[i]!='/' || url[i+1]=='/') && url[i]!='?' ; i++);
349 if (i>=urllen) return(false); // url is a hostname without any path or file to download
350
351 for (i=0 ; i<=max_suffix && i<urllen && url[urllen-i]!='.' ; i++)
352 if (url[urllen-i] == '/' || url[urllen-i] == '?') return(false);
353 if (i>max_suffix || i>=urllen) return(false);
354
355 suffix=url+urllen-i+1;
356 down=0;
357 up=NDownloadSuffix-1;
358 while (down<=up) {
359 center=(down+up)/2;
360 cmp=strcasecmp(suffix,DownloadSuffixIndex[center]);
361 if (cmp == 0) return(true);
362 if (cmp < 0)
363 up = center-1;
364 else
365 down = center+1;
366 }
367 return(false);
368 }
369