]> git.ipfire.org Git - thirdparty/sarg.git/blob - topsites.c
External sort command delimits the columns only on a tabulation
[thirdparty/sarg.git] / topsites.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2011
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 #ifdef ENABLE_DOUBLE_CHECK_DATA
31 extern struct globalstatstruct globstat;
32 #endif
33
34 void topsites(void)
35 {
36 FILE *fp_in, *fp_ou;
37
38 char *buf;
39 char *url;
40 char *ourl=NULL;
41 char csort[255];
42 char general[MAXLEN];
43 char general2[MAXLEN];
44 char general3[MAXLEN];
45 char sites[MAXLEN];
46 char report[MAXLEN];
47 const char *sortf;
48 const char *sortt;
49 long long int nacc;
50 long long int nbytes;
51 long long int ntime;
52 long long int tnacc=0;
53 long long int tnbytes=0;
54 long long int tntime=0;
55 long long int twork1=0, twork2=0, twork3=0;
56 #ifdef ENABLE_DOUBLE_CHECK_DATA
57 long long int ttnacc=0;
58 long long int ttnbytes=0;
59 long long int ttntime=0;
60 #endif
61 int regs=0;
62 int cstatus;
63 int url_len;
64 int ourl_size=0;
65 struct getwordstruct gwarea;
66 longline line;
67 struct generalitemstruct item;
68
69 if(Privacy)
70 return;
71
72 sprintf(general,"%s/sarg-general",outdirname);
73 sprintf(sites,"%s/sarg-sites",outdirname);
74 sprintf(general2,"%s/sarg-general2",outdirname);
75 sprintf(general3,"%s/sarg-general3",outdirname);
76
77 if ((ReportType & REPORT_TYPE_TOPUSERS) == 0)
78 sprintf(report,"%s/index.html",outdirname);
79 else
80 sprintf(report,"%s/topsites.html",outdirname);
81
82 sprintf(csort,"sort -t \"\t\" -k 4,4 -o \"%s\" \"%s\"",general2,general);
83 cstatus=system(csort);
84 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
85 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
86 debuga(_("sort command: %s\n"),csort);
87 exit(EXIT_FAILURE);
88 }
89
90 if((fp_in=fopen(general2,"r"))==NULL) {
91 debuga(_("(topsites) Cannot open log file %s\n"),general2);
92 debuga(_("sort command: %s\n"),csort);
93 exit(EXIT_FAILURE);
94 }
95
96 if((fp_ou=fopen(general3,"w"))==NULL) {
97 debuga(_("(topsites) Cannot open log file %s\n"),general3);
98 exit(EXIT_FAILURE);
99 }
100
101 if ((line=longline_create())==NULL) {
102 debuga(_("Not enough memory to read file %s\n"),general2);
103 exit(EXIT_FAILURE);
104 }
105
106 while((buf=longline_read(fp_in,line))!=NULL) {
107 ger_read(buf,&item,general2);
108 if(item.total) continue;
109
110 if(!regs) {
111 url_len=strlen(item.url);
112 if (!ourl || url_len>=ourl_size) {
113 ourl_size=url_len+1;
114 ourl=realloc(ourl,ourl_size);
115 if (!ourl) {
116 debuga(_("Not enough memory to store the url\n"));
117 exit(EXIT_FAILURE);
118 }
119 }
120 strcpy(ourl,item.url);
121 regs++;
122 }
123
124 if(strcmp(item.url,ourl) != 0) {
125 /*
126 This complicated printf is due to Microsoft's inability to comply with any standard. Msvcrt is unable
127 to print a long long int unless it is exactly 64-bits long.
128 */
129 fprintf(fp_ou,"%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%s\n",(uint64_t)tnacc,(uint64_t)tnbytes,(uint64_t)tntime,ourl);
130 url_len=strlen(item.url);
131 if (url_len>=ourl_size) {
132 ourl_size=url_len+1;
133 ourl=realloc(ourl,ourl_size);
134 if (!ourl) {
135 debuga(_("Not enough memory to store the url\n"));
136 exit(EXIT_FAILURE);
137 }
138 }
139 strcpy(ourl,item.url);
140 tnacc=0;
141 tnbytes=0;
142 tntime=0;
143 }
144
145 tnacc+=item.nacc;
146 tnbytes+=item.nbytes;
147 tntime+=item.nelap;
148 #ifdef ENABLE_DOUBLE_CHECK_DATA
149 ttnacc+=item.nacc;
150 ttnbytes+=item.nbytes;
151 ttntime+=item.nelap;
152 #endif
153 }
154 fclose(fp_in);
155 longline_destroy(&line);
156
157 if (ourl) {
158 /*
159 This complicated printf is due to Microsoft's inability to comply with any standard. Msvcrt is unable
160 to print a long long int unless it is exactly 64-bits long.
161 */
162 fprintf(fp_ou,"%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%s\n",(uint64_t)tnacc,(uint64_t)tnbytes,(uint64_t)tntime,ourl);
163 free(ourl);
164 }
165
166 fclose(fp_ou);
167
168 #ifdef ENABLE_DOUBLE_CHECK_DATA
169 if (ttnacc!=globstat.nacc || ttnbytes!=globstat.nbytes || ttntime!=globstat.elap) {
170 debuga(_("Total statistics mismatch when reading %s to produce the top sites\n"),general2);
171 exit(EXIT_FAILURE);
172 }
173 #endif
174
175 if (unlink(general2)) {
176 debuga(_("Cannot delete %s - %s\n"),general2,strerror(errno));
177 exit(EXIT_FAILURE);
178 }
179
180 if((TopsitesSort & TOPSITE_SORT_CONNECT) != 0) {
181 sortf="-k 1,1 -k 2,2";
182 } else if((TopsitesSort & TOPSITE_SORT_BYTES) != 0) {
183 sortf="-k 2,2 -k 1,1";
184 } else if((TopsitesSort & TOPSITE_SORT_TIME) != 0) {
185 sortf="-k 3,3";
186 } else {
187 sortf="-k 2,2 -k 1,1"; //default is BYTES
188 }
189 if((TopsitesSort & TOPSITE_SORT_REVERSE) != 0) {
190 sortt="-r";
191 } else {
192 sortt="";
193 }
194
195 sprintf(csort,"sort -t \"\t\" %s -n %s -o \"%s\" \"%s\"",sortt,sortf,sites,general3);
196 cstatus=system(csort);
197 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
198 debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
199 debuga(_("sort command: %s\n"),csort);
200 exit(EXIT_FAILURE);
201 }
202 if((fp_in=fopen(sites,"r"))==NULL) {
203 debuga(_("(topsites) Cannot open log file %s\n"),sites);
204 debuga(_("sort command: %s\n"),csort);
205 exit(EXIT_FAILURE);
206 }
207
208 if (unlink(general3)) {
209 debuga(_("Cannot delete %s - %s\n"),general3,strerror(errno));
210 exit(EXIT_FAILURE);
211 }
212
213 if((fp_ou=fopen(report,"w"))==NULL) {
214 debuga(_("(topsites) Cannot open log file %s\n"),report);
215 exit(EXIT_FAILURE);
216 }
217
218 write_html_header(fp_ou,(IndexTree == INDEX_TREE_DATE) ? 3 : 1,_("Top sites"),HTML_JS_SORTTABLE);
219 fputs("<tr><td class=\"header_c\">",fp_ou);
220 fprintf(fp_ou,_("Period: %s"),period.html);
221 fputs("</td></tr>\n",fp_ou);
222 fputs("<tr><th class=\"header_c\">",fp_ou);
223 fprintf(fp_ou,_("Top %d sites"),TopSitesNum);
224 fputs("</th></tr>\n",fp_ou);
225 close_html_header(fp_ou);
226
227 fputs("<div class=\"report\"><table cellpadding=\"1\" cellspacing=\"2\"",fp_ou);
228 if (SortTableJs[0]) fputs(" class=\"sortable\"",fp_ou);
229 fputs(">\n",fp_ou);
230 fprintf(fp_ou,"<thead><tr><th class=\"header_l\">%s</th><th class=\"header_l",_("NUM"));
231 if (SortTableJs[0]) fputs(" sorttable_alpha",fp_ou);
232 fprintf(fp_ou,"\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr></thead>\n",_("ACCESSED SITE"),_("CONNECT"),_("BYTES"),_("TIME"));
233
234 regs=0;
235 ntopsites = 0;
236
237 if ((line=longline_create())==NULL) {
238 debuga(_("Not enough memory to read file %s\n"),sites);
239 exit(EXIT_FAILURE);
240 }
241
242 while(regs<TopSitesNum && (buf=longline_read(fp_in,line))!=NULL) {
243 getword_start(&gwarea,buf);
244 if (getword_atoll(&nacc,&gwarea,'\t')<0) {
245 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),sites);
246 exit(EXIT_FAILURE);
247 }
248 if (nacc == 0) continue;
249 if (getword_atoll(&nbytes,&gwarea,'\t')<0 || getword_atoll(&ntime,&gwarea,'\t')<0) {
250 debuga(_("Maybe you have a broken record or garbage in your %s file\n"),sites);
251 exit(EXIT_FAILURE);
252 }
253 if (getword_ptr(buf,&url,&gwarea,'\t')<0) {
254 debuga(_("The url is invalid in file %s\n"),sites);
255 exit(EXIT_FAILURE);
256 }
257
258 twork1=nacc;
259 twork2=nbytes;
260 twork3=ntime;
261
262 fprintf(fp_ou,"<tr><td class=\"data\">%d</td><td class=\"data2 link\">",++regs);
263
264 if(BlockIt[0] != '\0') {
265 fprintf(fp_ou,"<a href=\"%s%s?url=\"",wwwDocumentRoot,BlockIt);
266 output_html_url(fp_ou,url);
267 fputs("\"><img src=\"../images/sarg-squidguard-block.png\"></a>&nbsp;",fp_ou);
268 }
269
270 fputs("<a href=\"http://",fp_ou);
271 output_html_url(fp_ou,url);
272 fputs("\">",fp_ou);
273 output_html_string(fp_ou,url,100);
274 fputs("</a></td><td class=\"data\"",fp_ou);
275 if (SortTableJs[0]) fprintf(fp_ou," sorttable_customkey=\"%"PRId64"\"",(uint64_t)twork1);
276 fprintf(fp_ou,">%s</td>",fixnum(twork1,1));
277 fputs("<td class=\"data\"",fp_ou);
278 if (SortTableJs[0]) fprintf(fp_ou," sorttable_customkey=\"%"PRId64"\"",(uint64_t)twork2);
279 fprintf(fp_ou,">%s</td>",fixnum(twork2,1));
280 fputs("<td class=\"data\"",fp_ou);
281 if (SortTableJs[0]) fprintf(fp_ou," sorttable_customkey=\"%"PRId64"\"",(uint64_t)twork3);
282 fprintf(fp_ou,">%s</td></tr>\n",fixtime(twork3));
283 }
284 fclose(fp_in);
285 longline_destroy(&line);
286
287 fputs("</table></div>\n",fp_ou);
288 if (write_html_trailer(fp_ou)<0)
289 debuga(_("Write error in file %s\n"),report);
290 if (fclose(fp_ou)==EOF)
291 debuga(_("Failed to close file %s - %s\n"),report,strerror(errno));
292
293 return;
294 }