]>
Commit | Line | Data |
---|---|---|
25697a35 | 1 | /* |
94ff9470 | 2 | * SARG Squid Analysis Report Generator http://sarg.sourceforge.net |
61d965f3 | 3 | * 1998, 2012 |
25697a35 GS |
4 | * |
5 | * SARG donations: | |
6 | * please look at http://sarg.sourceforge.net/donations.php | |
1164c474 FM |
7 | * Support: |
8 | * http://sourceforge.net/projects/sarg/forums/forum/363374 | |
25697a35 GS |
9 | * --------------------------------------------------------------------- |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License as published by | |
13 | * the Free Software Foundation; either version 2 of the License, or | |
14 | * (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | * GNU General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public License | |
22 | * along with this program; if not, write to the Free Software | |
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. | |
24 | * | |
25 | */ | |
26 | ||
27 | #include "include/conf.h" | |
5f3cfd1d | 28 | #include "include/defs.h" |
25697a35 | 29 | |
b2fa3eb6 FM |
30 | /*! |
31 | The buffer to store the list of the suffixes to take into account when generating | |
32 | the report of the downloaded files. The suffixes in the list are separated by the ASCII | |
33 | null. | |
34 | */ | |
2824ec9b | 35 | /*@null@*/static char *DownloadSuffix=NULL; |
b2fa3eb6 FM |
36 | |
37 | /*! | |
38 | The index of all the suffixes stored in ::DownloadSuffix. The list is sorted alphabetically. | |
39 | to speed up the search. | |
40 | */ | |
2824ec9b | 41 | /*@null@*/static char **DownloadSuffixIndex=NULL; |
b2fa3eb6 FM |
42 | |
43 | /*! | |
44 | The number of suffixes in ::DownloadSuffixIndex. | |
45 | */ | |
6e792ade FM |
46 | static int NDownloadSuffix=0; |
47 | ||
b2fa3eb6 FM |
48 | /*! |
49 | Sort the raw log file with the downloaded files. | |
50 | ||
51 | \param report_in The name of the file where to store the sorted entries. | |
73b57f55 FM |
52 | |
53 | The file is sorted by columns 3, 1, 2 and 5 that are the columns of the user's ID, the | |
54 | date, the time and the URL. | |
b2fa3eb6 FM |
55 | */ |
56 | static void download_sort(const char *report_in) | |
57 | { | |
58 | int clen; | |
59 | char csort[MAXLEN]; | |
60 | int cstatus; | |
61 | ||
c98d6a0f | 62 | clen=snprintf(csort,sizeof(csort),"sort -T \"%s\" -t \"\t\" -k 3,3 -k 1,1 -k 2,2 -k 5,5 -o \"%s\" \"%s/download.int_unsort\"", |
b2fa3eb6 FM |
63 | tmp, report_in, tmp); |
64 | if (clen>=sizeof(csort)) { | |
c98d6a0f | 65 | debuga(_("Path too long to sort the file: %s/download.int_unsort\n"),tmp); |
b2fa3eb6 FM |
66 | exit(EXIT_FAILURE); |
67 | } | |
68 | cstatus=system(csort); | |
69 | if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) { | |
70 | debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus)); | |
71 | debuga(_("sort command: %s\n"),csort); | |
72 | exit(EXIT_FAILURE); | |
73 | } | |
c98d6a0f FM |
74 | if (snprintf(csort,sizeof(csort),"%s/download.int_unsort",tmp)>=sizeof(csort)) { |
75 | debuga(_("Path too long for %s/download.int_unsort\n"),tmp); | |
b2fa3eb6 FM |
76 | exit(EXIT_FAILURE); |
77 | } | |
11767c6a FM |
78 | if (!KeepTempLog && unlink(csort)) { |
79 | debuga(_("Cannot delete \"%s\": %s\n"),csort,strerror(errno)); | |
b2fa3eb6 FM |
80 | exit(EXIT_FAILURE); |
81 | } | |
82 | } | |
83 | ||
84 | /*! | |
85 | Generate the report of the downloaded files. The list of the suffixes to take into account | |
86 | is set with set_download_suffix(). | |
87 | */ | |
32e71fa4 | 88 | void download_report(void) |
25697a35 | 89 | { |
9bd92830 FM |
90 | FILE *fp_in = NULL, *fp_ou = NULL; |
91 | ||
92 | char *buf; | |
93 | char *url; | |
94 | char report_in[MAXLEN]; | |
95 | char report[MAXLEN]; | |
96 | char ip[MAXLEN]; | |
97 | char oip[MAXLEN]; | |
98 | char user[MAXLEN]; | |
99 | char ouser[MAXLEN]; | |
100 | char ouser2[MAXLEN]; | |
101 | char data[15]; | |
102 | char hora[15]; | |
103 | int z=0; | |
104 | int count=0; | |
105 | int i; | |
106 | int day,month,year; | |
107 | bool new_user; | |
108 | struct getwordstruct gwarea; | |
109 | longline line; | |
110 | struct userinfostruct *uinfo; | |
111 | struct tm t; | |
112 | ||
5589b847 FM |
113 | if (!ndownload) { |
114 | if (debugz) debugaz(_("No downloaded files to report\n")); | |
115 | return; | |
116 | } | |
117 | ||
9bd92830 FM |
118 | ouser[0]='\0'; |
119 | ouser2[0]='\0'; | |
120 | ||
b2fa3eb6 | 121 | // sort the raw file |
c98d6a0f | 122 | snprintf(report_in,sizeof(report_in),"%s/download.int_log",tmp); |
b2fa3eb6 | 123 | download_sort(report_in); |
9f93fec3 | 124 | if(access(report_in, R_OK) != 0) { |
5589b847 FM |
125 | debugaz(_("Sorted file doesn't exist (to produce the download report)\n")); |
126 | exit(EXIT_FAILURE); | |
9f93fec3 | 127 | } |
9bd92830 | 128 | |
b2fa3eb6 | 129 | // produce the report. |
9bd92830 FM |
130 | snprintf(report,sizeof(report),"%s/download.html",outdirname); |
131 | ||
132 | if((fp_in=MY_FOPEN(report_in,"r"))==NULL) { | |
133 | debuga(_("(download) Cannot open log file %s\n"),report_in); | |
134 | exit(EXIT_FAILURE); | |
135 | } | |
136 | ||
137 | if((fp_ou=MY_FOPEN(report,"w"))==NULL) { | |
138 | debuga(_("(download) Cannot open log file %s\n"),report); | |
139 | exit(EXIT_FAILURE); | |
140 | } | |
141 | ||
142 | write_html_header(fp_ou,(IndexTree == INDEX_TREE_DATE) ? 3 : 1,_("Downloads"),HTML_JS_NONE); | |
143 | fputs("<tr><td class=\"header_c\">",fp_ou); | |
144 | fprintf(fp_ou,_("Period: %s"),period.html); | |
145 | fputs("</td></tr>\n",fp_ou); | |
146 | fprintf(fp_ou,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Downloads")); | |
147 | close_html_header(fp_ou); | |
148 | ||
149 | fputs("<div class=\"report\"><table cellpadding=\"0\" cellspacing=\"2\">\n",fp_ou); | |
150 | fprintf(fp_ou,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE")); | |
151 | ||
152 | if ((line=longline_create())==NULL) { | |
153 | debuga(_("Not enough memory to read the downloaded files\n")); | |
154 | exit(EXIT_FAILURE); | |
155 | } | |
156 | ||
157 | while((buf=longline_read(fp_in,line))!=NULL) { | |
158 | getword_start(&gwarea,buf); | |
159 | if (getword(data,sizeof(data),&gwarea,'\t')<0 || getword(hora,sizeof(hora),&gwarea,'\t')<0 || | |
007905af | 160 | getword(user,sizeof(user),&gwarea,'\t')<0 || getword(ip,sizeof(ip),&gwarea,'\t')<0) { |
9bd92830 FM |
161 | debuga(_("There is a broken record or garbage in file %s\n"),report_in); |
162 | exit(EXIT_FAILURE); | |
163 | } | |
164 | if (getword_ptr(buf,&url,&gwarea,'\t')<0) { | |
165 | debuga(_("There is a broken url in file %s\n"),report_in); | |
166 | exit(EXIT_FAILURE); | |
167 | } | |
168 | if (sscanf(data,"%d/%d/%d",&day,&month,&year)!=3) continue; | |
169 | computedate(year,month,day,&t); | |
170 | strftime(data,sizeof(data),"%x",&t); | |
171 | ||
172 | uinfo=userinfo_find_from_id(user); | |
173 | if (!uinfo) { | |
174 | debuga(_("Unknown user ID %s in file %s\n"),user,report_in); | |
175 | exit(EXIT_FAILURE); | |
176 | } | |
177 | new_user=false; | |
178 | if(!z) { | |
179 | strcpy(ouser,user); | |
180 | strcpy(oip,ip); | |
181 | z++; | |
182 | new_user=true; | |
183 | } else { | |
184 | if(strcmp(ouser,user) != 0) { | |
185 | strcpy(ouser,user); | |
186 | new_user=true; | |
187 | } | |
188 | if(strcmp(oip,ip) != 0) { | |
189 | strcpy(oip,ip); | |
190 | new_user=true; | |
191 | } | |
192 | } | |
193 | ||
194 | if(DownloadReportLimit) { | |
195 | if(strcmp(ouser2,uinfo->label) == 0) { | |
196 | count++; | |
197 | } else { | |
198 | count=1; | |
199 | strcpy(ouser2,uinfo->label); | |
200 | } | |
201 | if(count >= DownloadReportLimit) | |
202 | continue; | |
203 | } | |
204 | ||
205 | for (i=strlen(url)-1 ; i>=0 && (unsigned char)url[i]<' ' ; i--) url[i]=0; | |
206 | ||
207 | fputs("<tr>",fp_ou); | |
5138c1b9 FM |
208 | if (new_user) { |
209 | if (uinfo->topuser) | |
210 | fprintf(fp_ou,"<td class=\"data\"><a href=\"%s/%s.html\">%s</a></td><td class=\"data\">%s</td>",uinfo->filename,uinfo->filename,uinfo->label,ip); | |
211 | else | |
212 | fprintf(fp_ou,"<td class=\"data\">%s</td><td class=\"data\">%s</td>",uinfo->label,ip); | |
213 | } else | |
9bd92830 FM |
214 | fputs("<td class=\"data\"></td><td class=\"data\"></td>",fp_ou); |
215 | fprintf(fp_ou,"<td class=\"data\">%s-%s</td><td class=\"data2\">",data,hora); | |
67a93701 | 216 | if(BlockIt[0]!='\0' && url[0]!=ALIAS_PREFIX) { |
9bd92830 FM |
217 | fprintf(fp_ou,"<a href=\"%s%s?url=\"",wwwDocumentRoot,BlockIt); |
218 | output_html_url(fp_ou,url); | |
219 | fprintf(fp_ou,"\"><img src=\"%s/sarg-squidguard-block.png\"></a> ",ImageFile); | |
220 | } | |
6fa33a32 | 221 | output_html_link(fp_ou,url,100); |
67a93701 | 222 | fputs("</td></tr>\n",fp_ou); |
9bd92830 FM |
223 | } |
224 | fclose(fp_in); | |
225 | longline_destroy(&line); | |
226 | ||
227 | fputs("</table></div>\n",fp_ou); | |
228 | if (write_html_trailer(fp_ou)<0) | |
229 | debuga(_("Write error in file %s\n"),report); | |
230 | if (fclose(fp_ou)==EOF) | |
231 | debuga(_("Failed to close file %s - %s\n"),report,strerror(errno)); | |
232 | ||
11767c6a FM |
233 | if (!KeepTempLog && unlink(report_in)) { |
234 | debuga(_("Cannot delete \"%s\": %s\n"),report_in,strerror(errno)); | |
08f9b029 FM |
235 | exit(EXIT_FAILURE); |
236 | } | |
9bd92830 FM |
237 | |
238 | return; | |
25697a35 | 239 | } |
6e792ade | 240 | |
b2fa3eb6 FM |
241 | /*! |
242 | Free the memory allocated by set_download_suffix(). | |
243 | */ | |
6e792ade FM |
244 | void free_download(void) |
245 | { | |
9bd92830 FM |
246 | if (DownloadSuffix) { |
247 | free(DownloadSuffix); | |
248 | DownloadSuffix=NULL; | |
249 | } | |
250 | if (DownloadSuffixIndex) { | |
251 | free(DownloadSuffixIndex); | |
252 | DownloadSuffixIndex=NULL; | |
253 | } | |
254 | NDownloadSuffix=0; | |
6e792ade FM |
255 | } |
256 | ||
b2fa3eb6 FM |
257 | /*! |
258 | Set the list of the suffixes corresponding to the download of files you want to detect with | |
259 | is_download_suffix(). The list is sorted to make the search faster. | |
260 | ||
261 | \param list A comma separated list of the suffixes to set in ::DownloadSuffix. | |
262 | ||
263 | \note The memory allocated by this function must be freed by free_download(). | |
264 | */ | |
6e792ade FM |
265 | void set_download_suffix(const char *list) |
266 | { | |
9bd92830 FM |
267 | char *str; |
268 | int i, j, k; | |
269 | int cmp; | |
270 | ||
271 | free_download(); | |
272 | ||
273 | DownloadSuffix=strdup(list); | |
274 | if (!DownloadSuffix) { | |
275 | debuga(_("Download suffix list too long\n")); | |
276 | exit(EXIT_FAILURE); | |
277 | } | |
278 | j = 1; | |
279 | for (i=0 ; list[i] ; i++) | |
280 | if (list[i] == ',') j++; | |
281 | DownloadSuffixIndex=malloc(j*sizeof(char *)); | |
282 | if (!DownloadSuffixIndex) { | |
283 | debuga(_("Too many download suffixes\n")); | |
284 | exit(EXIT_FAILURE); | |
285 | } | |
286 | ||
287 | str = DownloadSuffix; | |
288 | for (i=0 ; DownloadSuffix[i] ; i++) { | |
289 | if (DownloadSuffix[i] == ',') { | |
290 | DownloadSuffix[i] = '\0'; | |
291 | if (*str) { | |
292 | cmp = -1; | |
293 | for (j=0 ; j<NDownloadSuffix && (cmp=strcasecmp(str,DownloadSuffixIndex[j]))>0 ; j++); | |
294 | if (cmp != 0) { | |
295 | for (k=NDownloadSuffix ; k>j ; k--) | |
296 | DownloadSuffixIndex[k]=DownloadSuffixIndex[k-1]; | |
297 | NDownloadSuffix++; | |
298 | DownloadSuffixIndex[j]=str; | |
299 | } | |
300 | } | |
301 | str=DownloadSuffix+i+1; | |
302 | } | |
303 | } | |
304 | ||
305 | if (*str) { | |
306 | cmp = -1; | |
307 | for (j=0 ; j<NDownloadSuffix && (cmp=strcasecmp(str,DownloadSuffixIndex[j]))>0 ; j++); | |
308 | if (cmp != 0) { | |
309 | for (k=NDownloadSuffix ; k>j ; k--) | |
310 | DownloadSuffixIndex[k]=DownloadSuffixIndex[k-1]; | |
311 | NDownloadSuffix++; | |
312 | DownloadSuffixIndex[j]=str; | |
313 | } | |
314 | } | |
6e792ade FM |
315 | } |
316 | ||
b2fa3eb6 FM |
317 | /*! |
318 | Tell if the URL correspond to a downloaded file. The function takes the extension at the end of the | |
319 | URL with a maximum of 9 characters and compare it to the list of the download suffix in | |
320 | ::DownloadSuffix. If the suffix is found in the list, the function reports the URL as the download | |
321 | of a file. | |
322 | ||
323 | \param url The URL to test. | |
324 | ||
325 | \retval 1 The URL matches a suffix of a download. | |
326 | \retval 0 The URL is not a known download. | |
327 | ||
328 | \note A downloaded file cannot be detected if the file name is embedded in a GET or POST request. Only requests | |
329 | that ends with the file name can be detected. | |
330 | ||
331 | \note A URL embedding another web site's address ending by .com at the end of the URL will match the download | |
332 | extension com if it is defined in the ::DownloadSuffix. | |
333 | */ | |
2824ec9b | 334 | bool is_download_suffix(const char *url) |
6e792ade | 335 | { |
9bd92830 FM |
336 | int urllen; |
337 | int i; | |
338 | int down, up, center; | |
339 | const char *suffix; | |
340 | int cmp; | |
341 | const int max_suffix=10; | |
342 | ||
343 | if (DownloadSuffix == NULL || NDownloadSuffix == 0) return(false); | |
344 | ||
345 | urllen=strlen(url)-1; | |
346 | if (urllen<=0) return(false); | |
347 | if (url[urllen] == '.') return(false); //reject a single trailing dot | |
348 | for (i=0 ; i<urllen && (url[i]!='/' || url[i+1]=='/') && url[i]!='?' ; i++); | |
349 | if (i>=urllen) return(false); // url is a hostname without any path or file to download | |
350 | ||
351 | for (i=0 ; i<=max_suffix && i<urllen && url[urllen-i]!='.' ; i++) | |
352 | if (url[urllen-i] == '/' || url[urllen-i] == '?') return(false); | |
353 | if (i>max_suffix || i>=urllen) return(false); | |
354 | ||
355 | suffix=url+urllen-i+1; | |
356 | down=0; | |
357 | up=NDownloadSuffix-1; | |
358 | while (down<=up) { | |
359 | center=(down+up)/2; | |
360 | cmp=strcasecmp(suffix,DownloadSuffixIndex[center]); | |
361 | if (cmp == 0) return(true); | |
362 | if (cmp < 0) | |
363 | up = center-1; | |
364 | else | |
365 | down = center+1; | |
366 | } | |
367 | return(false); | |
6e792ade FM |
368 | } |
369 |