]>
git.ipfire.org Git - thirdparty/sarg.git/blob - download.c
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
31 The buffer to store the list of the suffixes to take into account when generating
32 the report of the downloaded files. The suffixes in the list are separated by the ASCII
35 /*@null@*/static char *DownloadSuffix
=NULL
;
38 The index of all the suffixes stored in ::DownloadSuffix. The list is sorted alphabetically.
39 to speed up the search.
41 /*@null@*/static char **DownloadSuffixIndex
=NULL
;
44 The number of suffixes in ::DownloadSuffixIndex.
46 static int NDownloadSuffix
=0;
49 Sort the raw log file with the downloaded files.
51 \param report_in The name of the file where to store the sorted entries.
53 The file is sorted by columns 3, 1, 2 and 5 that are the columns of the user's ID, the
54 date, the time and the URL.
56 static void download_sort(const char *report_in
)
62 clen
=snprintf(csort
,sizeof(csort
),"sort -T \"%s\" -t \"\t\" -k 3,3 -k 1,1 -k 2,2 -k 5,5 -o \"%s\" \"%s/download.int_unsort\"",
64 if (clen
>=sizeof(csort
)) {
65 debuga(_("Path too long to sort the file: %s/download.int_unsort\n"),tmp
);
68 cstatus
=system(csort
);
69 if (!WIFEXITED(cstatus
) || WEXITSTATUS(cstatus
)) {
70 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus
));
71 debuga(_("sort command: %s\n"),csort
);
74 if (snprintf(csort
,sizeof(csort
),"%s/download.int_unsort",tmp
)>=sizeof(csort
)) {
75 debuga(_("Path too long for %s/download.int_unsort\n"),tmp
);
79 debuga(_("Cannot delete %s - %s\n"),csort
,strerror(errno
));
85 Generate the report of the downloaded files. The list of the suffixes to take into account
86 is set with set_download_suffix().
88 void download_report(void)
90 FILE *fp_in
= NULL
, *fp_ou
= NULL
;
94 char report_in
[MAXLEN
];
108 struct getwordstruct gwarea
;
110 struct userinfostruct
*uinfo
;
117 snprintf(report_in
,sizeof(report_in
),"%s/download.int_log",tmp
);
118 download_sort(report_in
);
119 if(access(report_in
, R_OK
) != 0) {
120 if (debugz
) debugaz(_("Downloaded files report not generated as it is empty\n"));
124 // produce the report.
125 snprintf(report
,sizeof(report
),"%s/download.html",outdirname
);
127 if((fp_in
=MY_FOPEN(report_in
,"r"))==NULL
) {
128 debuga(_("(download) Cannot open log file %s\n"),report_in
);
132 if((fp_ou
=MY_FOPEN(report
,"w"))==NULL
) {
133 debuga(_("(download) Cannot open log file %s\n"),report
);
137 write_html_header(fp_ou
,(IndexTree
== INDEX_TREE_DATE
) ? 3 : 1,_("Downloads"),HTML_JS_NONE
);
138 fputs("<tr><td class=\"header_c\">",fp_ou
);
139 fprintf(fp_ou
,_("Period: %s"),period
.html
);
140 fputs("</td></tr>\n",fp_ou
);
141 fprintf(fp_ou
,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Downloads"));
142 close_html_header(fp_ou
);
144 fputs("<div class=\"report\"><table cellpadding=\"0\" cellspacing=\"2\">\n",fp_ou
);
145 fprintf(fp_ou
,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"));
147 if ((line
=longline_create())==NULL
) {
148 debuga(_("Not enough memory to read the downloaded files\n"));
152 while((buf
=longline_read(fp_in
,line
))!=NULL
) {
153 getword_start(&gwarea
,buf
);
154 if (getword(data
,sizeof(data
),&gwarea
,'\t')<0 || getword(hora
,sizeof(hora
),&gwarea
,'\t')<0 ||
155 getword(user
,sizeof(user
),&gwarea
,'\t')<0 || getword(ip
,sizeof(ip
),&gwarea
,'\t')<0) {
156 debuga(_("There is a broken record or garbage in file %s\n"),report_in
);
159 if (getword_ptr(buf
,&url
,&gwarea
,'\t')<0) {
160 debuga(_("There is a broken url in file %s\n"),report_in
);
163 if (sscanf(data
,"%d/%d/%d",&day
,&month
,&year
)!=3) continue;
164 computedate(year
,month
,day
,&t
);
165 strftime(data
,sizeof(data
),"%x",&t
);
167 uinfo
=userinfo_find_from_id(user
);
169 debuga(_("Unknown user ID %s in file %s\n"),user
,report_in
);
179 if(strcmp(ouser
,user
) != 0) {
183 if(strcmp(oip
,ip
) != 0) {
189 if(DownloadReportLimit
) {
190 if(strcmp(ouser2
,uinfo
->label
) == 0) {
194 strcpy(ouser2
,uinfo
->label
);
196 if(count
>= DownloadReportLimit
)
200 for (i
=strlen(url
)-1 ; i
>=0 && (unsigned char)url
[i
]<' ' ; i
--) url
[i
]=0;
205 fprintf(fp_ou
,"<td class=\"data\"><a href=\"%s/%s.html\">%s</a></td><td class=\"data\">%s</td>",uinfo
->filename
,uinfo
->filename
,uinfo
->label
,ip
);
207 fprintf(fp_ou
,"<td class=\"data\">%s</td><td class=\"data\">%s</td>",uinfo
->label
,ip
);
209 fputs("<td class=\"data\"></td><td class=\"data\"></td>",fp_ou
);
210 fprintf(fp_ou
,"<td class=\"data\">%s-%s</td><td class=\"data2\">",data
,hora
);
211 if(BlockIt
[0]!='\0' && url
[0]!=ALIAS_PREFIX
) {
212 fprintf(fp_ou
,"<a href=\"%s%s?url=\"",wwwDocumentRoot
,BlockIt
);
213 output_html_url(fp_ou
,url
);
214 fprintf(fp_ou
,"\"><img src=\"%s/sarg-squidguard-block.png\"></a> ",ImageFile
);
216 output_html_link(fp_ou
,url
,100);
217 fputs("</td></tr>\n",fp_ou
);
220 longline_destroy(&line
);
222 fputs("</table></div>\n",fp_ou
);
223 if (write_html_trailer(fp_ou
)<0)
224 debuga(_("Write error in file %s\n"),report
);
225 if (fclose(fp_ou
)==EOF
)
226 debuga(_("Failed to close file %s - %s\n"),report
,strerror(errno
));
228 if (unlink(report_in
)) {
229 debuga(_("Cannot delete %s - %s\n"),report_in
,strerror(errno
));
237 Free the memory allocated by set_download_suffix().
239 void free_download(void)
241 if (DownloadSuffix
) {
242 free(DownloadSuffix
);
245 if (DownloadSuffixIndex
) {
246 free(DownloadSuffixIndex
);
247 DownloadSuffixIndex
=NULL
;
253 Set the list of the suffixes corresponding to the download of files you want to detect with
254 is_download_suffix(). The list is sorted to make the search faster.
256 \param list A comma separated list of the suffixes to set in ::DownloadSuffix.
258 \note The memory allocated by this function must be freed by free_download().
260 void set_download_suffix(const char *list
)
268 DownloadSuffix
=strdup(list
);
269 if (!DownloadSuffix
) {
270 debuga(_("Download suffix list too long\n"));
274 for (i
=0 ; list
[i
] ; i
++)
275 if (list
[i
] == ',') j
++;
276 DownloadSuffixIndex
=malloc(j
*sizeof(char *));
277 if (!DownloadSuffixIndex
) {
278 debuga(_("Too many download suffixes\n"));
282 str
= DownloadSuffix
;
283 for (i
=0 ; DownloadSuffix
[i
] ; i
++) {
284 if (DownloadSuffix
[i
] == ',') {
285 DownloadSuffix
[i
] = '\0';
288 for (j
=0 ; j
<NDownloadSuffix
&& (cmp
=strcasecmp(str
,DownloadSuffixIndex
[j
]))>0 ; j
++);
290 for (k
=NDownloadSuffix
; k
>j
; k
--)
291 DownloadSuffixIndex
[k
]=DownloadSuffixIndex
[k
-1];
293 DownloadSuffixIndex
[j
]=str
;
296 str
=DownloadSuffix
+i
+1;
302 for (j
=0 ; j
<NDownloadSuffix
&& (cmp
=strcasecmp(str
,DownloadSuffixIndex
[j
]))>0 ; j
++);
304 for (k
=NDownloadSuffix
; k
>j
; k
--)
305 DownloadSuffixIndex
[k
]=DownloadSuffixIndex
[k
-1];
307 DownloadSuffixIndex
[j
]=str
;
313 Tell if the URL correspond to a downloaded file. The function takes the extension at the end of the
314 URL with a maximum of 9 characters and compare it to the list of the download suffix in
315 ::DownloadSuffix. If the suffix is found in the list, the function reports the URL as the download
318 \param url The URL to test.
320 \retval 1 The URL matches a suffix of a download.
321 \retval 0 The URL is not a known download.
323 \note A downloaded file cannot be detected if the file name is embedded in a GET or POST request. Only requests
324 that ends with the file name can be detected.
326 \note A URL embedding another web site's address ending by .com at the end of the URL will match the download
327 extension com if it is defined in the ::DownloadSuffix.
329 bool is_download_suffix(const char *url
)
333 int down
, up
, center
;
336 const int max_suffix
=10;
338 if (DownloadSuffix
== NULL
|| NDownloadSuffix
== 0) return(false);
340 urllen
=strlen(url
)-1;
341 if (urllen
<=0) return(false);
342 if (url
[urllen
] == '.') return(false); //reject a single trailing dot
343 for (i
=0 ; i
<urllen
&& (url
[i
]!='/' || url
[i
+1]=='/') && url
[i
]!='?' ; i
++);
344 if (i
>=urllen
) return(false); // url is a hostname without any path or file to download
346 for (i
=0 ; i
<=max_suffix
&& i
<urllen
&& url
[urllen
-i
]!='.' ; i
++)
347 if (url
[urllen
-i
] == '/' || url
[urllen
-i
] == '?') return(false);
348 if (i
>max_suffix
|| i
>=urllen
) return(false);
350 suffix
=url
+urllen
-i
+1;
352 up
=NDownloadSuffix
-1;
355 cmp
=strcasecmp(suffix
,DownloadSuffixIndex
[center
]);
356 if (cmp
== 0) return(true);