]>
git.ipfire.org Git - thirdparty/sarg.git/blob - download.c
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
31 The buffer to store the list of the suffixes to take into account when generating
32 the report of the downloaded files. The suffixes in the list are separated by the ASCII
35 /*@null@*/static char *DownloadSuffix
=NULL
;
38 The index of all the suffixes stored in ::DownloadSuffix. The list is sorted alphabetically.
39 to speed up the search.
41 /*@null@*/static char **DownloadSuffixIndex
=NULL
;
44 The number of suffixes in ::DownloadSuffixIndex.
46 static int NDownloadSuffix
=0;
49 Sort the raw log file with the downloaded files.
51 \param report_in The name of the file where to store the sorted entries.
53 The file is sorted by columns 3, 1, 2 and 5 that are the columns of the user's ID, the
54 date, the time and the URL.
56 static void download_sort(const char *report_in
)
62 clen
=snprintf(csort
,sizeof(csort
),"sort -T \"%s\" -t \"\t\" -k 3,3 -k 1,1 -k 2,2 -k 5,5 -o \"%s\" \"%s/download.int_unsort\"",
64 if (clen
>=sizeof(csort
)) {
65 debuga(_("Path too long to sort the file: %s/download.int_unsort\n"),tmp
);
68 cstatus
=system(csort
);
69 if (!WIFEXITED(cstatus
) || WEXITSTATUS(cstatus
)) {
70 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus
));
71 debuga(_("sort command: %s\n"),csort
);
74 if (snprintf(csort
,sizeof(csort
),"%s/download.int_unsort",tmp
)>=sizeof(csort
)) {
75 debuga(_("Path too long for %s/download.int_unsort\n"),tmp
);
78 if (!KeepTempLog
&& unlink(csort
)) {
79 debuga(_("Cannot delete \"%s\": %s\n"),csort
,strerror(errno
));
85 Generate the report of the downloaded files. The list of the suffixes to take into account
86 is set with set_download_suffix().
88 void download_report(void)
90 FILE *fp_in
= NULL
, *fp_ou
= NULL
;
94 char report_in
[MAXLEN
];
108 struct getwordstruct gwarea
;
110 struct userinfostruct
*uinfo
;
114 if (debugz
) debugaz(_("No downloaded files to report\n"));
122 snprintf(report_in
,sizeof(report_in
),"%s/download.int_log",tmp
);
123 download_sort(report_in
);
124 if(access(report_in
, R_OK
) != 0) {
125 debugaz(_("Sorted file doesn't exist (to produce the download report)\n"));
129 // produce the report.
130 snprintf(report
,sizeof(report
),"%s/download.html",outdirname
);
132 if((fp_in
=MY_FOPEN(report_in
,"r"))==NULL
) {
133 debuga(_("(download) Cannot open log file %s\n"),report_in
);
137 if((fp_ou
=MY_FOPEN(report
,"w"))==NULL
) {
138 debuga(_("(download) Cannot open log file %s\n"),report
);
142 write_html_header(fp_ou
,(IndexTree
== INDEX_TREE_DATE
) ? 3 : 1,_("Downloads"),HTML_JS_NONE
);
143 fputs("<tr><td class=\"header_c\">",fp_ou
);
144 fprintf(fp_ou
,_("Period: %s"),period
.html
);
145 fputs("</td></tr>\n",fp_ou
);
146 fprintf(fp_ou
,"<tr><th class=\"header_c\">%s</th></tr>\n",_("Downloads"));
147 close_html_header(fp_ou
);
149 fputs("<div class=\"report\"><table cellpadding=\"0\" cellspacing=\"2\">\n",fp_ou
);
150 fprintf(fp_ou
,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("USERID"),_("IP/NAME"),_("DATE/TIME"),_("ACCESSED SITE"));
152 if ((line
=longline_create())==NULL
) {
153 debuga(_("Not enough memory to read the downloaded files\n"));
157 while((buf
=longline_read(fp_in
,line
))!=NULL
) {
158 getword_start(&gwarea
,buf
);
159 if (getword(data
,sizeof(data
),&gwarea
,'\t')<0 || getword(hora
,sizeof(hora
),&gwarea
,'\t')<0 ||
160 getword(user
,sizeof(user
),&gwarea
,'\t')<0 || getword(ip
,sizeof(ip
),&gwarea
,'\t')<0) {
161 debuga(_("There is a broken record or garbage in file %s\n"),report_in
);
164 if (getword_ptr(buf
,&url
,&gwarea
,'\t')<0) {
165 debuga(_("There is a broken url in file %s\n"),report_in
);
168 if (sscanf(data
,"%d/%d/%d",&day
,&month
,&year
)!=3) continue;
169 computedate(year
,month
,day
,&t
);
170 strftime(data
,sizeof(data
),"%x",&t
);
172 uinfo
=userinfo_find_from_id(user
);
174 debuga(_("Unknown user ID %s in file %s\n"),user
,report_in
);
184 if(strcmp(ouser
,user
) != 0) {
188 if(strcmp(oip
,ip
) != 0) {
194 if(DownloadReportLimit
) {
195 if(strcmp(ouser2
,uinfo
->label
) == 0) {
199 strcpy(ouser2
,uinfo
->label
);
201 if(count
>= DownloadReportLimit
)
205 for (i
=strlen(url
)-1 ; i
>=0 && (unsigned char)url
[i
]<' ' ; i
--) url
[i
]=0;
210 fprintf(fp_ou
,"<td class=\"data\"><a href=\"%s/%s.html\">%s</a></td><td class=\"data\">%s</td>",uinfo
->filename
,uinfo
->filename
,uinfo
->label
,ip
);
212 fprintf(fp_ou
,"<td class=\"data\">%s</td><td class=\"data\">%s</td>",uinfo
->label
,ip
);
214 fputs("<td class=\"data\"></td><td class=\"data\"></td>",fp_ou
);
215 fprintf(fp_ou
,"<td class=\"data\">%s-%s</td><td class=\"data2\">",data
,hora
);
216 if(BlockIt
[0]!='\0' && url
[0]!=ALIAS_PREFIX
) {
217 fprintf(fp_ou
,"<a href=\"%s%s?url=\"",wwwDocumentRoot
,BlockIt
);
218 output_html_url(fp_ou
,url
);
219 fprintf(fp_ou
,"\"><img src=\"%s/sarg-squidguard-block.png\"></a> ",ImageFile
);
221 output_html_link(fp_ou
,url
,100);
222 fputs("</td></tr>\n",fp_ou
);
225 longline_destroy(&line
);
227 fputs("</table></div>\n",fp_ou
);
228 if (write_html_trailer(fp_ou
)<0)
229 debuga(_("Write error in file %s\n"),report
);
230 if (fclose(fp_ou
)==EOF
)
231 debuga(_("Failed to close file %s - %s\n"),report
,strerror(errno
));
233 if (!KeepTempLog
&& unlink(report_in
)) {
234 debuga(_("Cannot delete \"%s\": %s\n"),report_in
,strerror(errno
));
242 Free the memory allocated by set_download_suffix().
244 void free_download(void)
246 if (DownloadSuffix
) {
247 free(DownloadSuffix
);
250 if (DownloadSuffixIndex
) {
251 free(DownloadSuffixIndex
);
252 DownloadSuffixIndex
=NULL
;
258 Set the list of the suffixes corresponding to the download of files you want to detect with
259 is_download_suffix(). The list is sorted to make the search faster.
261 \param list A comma separated list of the suffixes to set in ::DownloadSuffix.
263 \note The memory allocated by this function must be freed by free_download().
265 void set_download_suffix(const char *list
)
273 DownloadSuffix
=strdup(list
);
274 if (!DownloadSuffix
) {
275 debuga(_("Download suffix list too long\n"));
279 for (i
=0 ; list
[i
] ; i
++)
280 if (list
[i
] == ',') j
++;
281 DownloadSuffixIndex
=malloc(j
*sizeof(char *));
282 if (!DownloadSuffixIndex
) {
283 debuga(_("Too many download suffixes\n"));
287 str
= DownloadSuffix
;
288 for (i
=0 ; DownloadSuffix
[i
] ; i
++) {
289 if (DownloadSuffix
[i
] == ',') {
290 DownloadSuffix
[i
] = '\0';
293 for (j
=0 ; j
<NDownloadSuffix
&& (cmp
=strcasecmp(str
,DownloadSuffixIndex
[j
]))>0 ; j
++);
295 for (k
=NDownloadSuffix
; k
>j
; k
--)
296 DownloadSuffixIndex
[k
]=DownloadSuffixIndex
[k
-1];
298 DownloadSuffixIndex
[j
]=str
;
301 str
=DownloadSuffix
+i
+1;
307 for (j
=0 ; j
<NDownloadSuffix
&& (cmp
=strcasecmp(str
,DownloadSuffixIndex
[j
]))>0 ; j
++);
309 for (k
=NDownloadSuffix
; k
>j
; k
--)
310 DownloadSuffixIndex
[k
]=DownloadSuffixIndex
[k
-1];
312 DownloadSuffixIndex
[j
]=str
;
318 Tell if the URL correspond to a downloaded file. The function takes the extension at the end of the
319 URL with a maximum of 9 characters and compare it to the list of the download suffix in
320 ::DownloadSuffix. If the suffix is found in the list, the function reports the URL as the download
323 \param url The URL to test.
325 \retval 1 The URL matches a suffix of a download.
326 \retval 0 The URL is not a known download.
328 \note A downloaded file cannot be detected if the file name is embedded in a GET or POST request. Only requests
329 that ends with the file name can be detected.
331 \note A URL embedding another web site's address ending by .com at the end of the URL will match the download
332 extension com if it is defined in the ::DownloadSuffix.
334 bool is_download_suffix(const char *url
)
338 int down
, up
, center
;
341 const int max_suffix
=10;
343 if (DownloadSuffix
== NULL
|| NDownloadSuffix
== 0) return(false);
345 urllen
=strlen(url
)-1;
346 if (urllen
<=0) return(false);
347 if (url
[urllen
] == '.') return(false); //reject a single trailing dot
348 for (i
=0 ; i
<urllen
&& (url
[i
]!='/' || url
[i
+1]=='/') && url
[i
]!='?' ; i
++);
349 if (i
>=urllen
) return(false); // url is a hostname without any path or file to download
351 for (i
=0 ; i
<=max_suffix
&& i
<urllen
&& url
[urllen
-i
]!='.' ; i
++)
352 if (url
[urllen
-i
] == '/' || url
[urllen
-i
] == '?') return(false);
353 if (i
>max_suffix
|| i
>=urllen
) return(false);
355 suffix
=url
+urllen
-i
+1;
357 up
=NDownloadSuffix
-1;
360 cmp
=strcasecmp(suffix
,DownloadSuffixIndex
[center
]);
361 if (cmp
== 0) return(true);