]> git.ipfire.org Git - thirdparty/sarg.git/blobdiff - topsites.c
Add support to decompress xz files
[thirdparty/sarg.git] / topsites.c
index d53ac6250a7f4ecded06dd09653373aa5f0e1f58..5992cef520e38c3b672b299095ffddff01137106 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * SARG Squid Analysis Report Generator      http://sarg.sourceforge.net
- *                                                            1998, 2010
+ *                                                            1998, 2015
  *
  * SARG donations:
  *      please look at http://sarg.sourceforge.net/donations.php
 #include "include/conf.h"
 #include "include/defs.h"
 
+#ifdef ENABLE_DOUBLE_CHECK_DATA
+extern struct globalstatstruct globstat;
+#endif
+
 void topsites(void)
 {
-
-   FILE *fp_in, *fp_ou;
-
-   char *buf;
-   char *url;
-   char *ourl=NULL;
-   char csort[255];
-   char general[MAXLEN];
-   char general2[MAXLEN];
-   char general3[MAXLEN];
-   char per[MAXLEN];
-   char sites[MAXLEN];
-   char report[MAXLEN];
-   char period[100];
-   const char *sortf;
-   const char *sortt;
-   long long int nacc;
-   long long int nbytes;
-   long long int ntime;
-   long long int tnacc=0;
-   long long int tnbytes=0;
-   long long int tntime=0;
-   long long int twork1=0, twork2=0, twork3=0;
-   int regs=0;
-   int cstatus;
-   int url_len;
-   int ourl_size=0;
-   struct getwordstruct gwarea;
-   struct longlinestruct line;
-   struct generalitemstruct item;
-
-   if(Privacy)
-      return;
-
-   sprintf(general,"%s/sarg-general",outdirname);
-   sprintf(sites,"%s/sarg-sites",outdirname);
-   sprintf(general2,"%s/sarg-general2",outdirname);
-   sprintf(general3,"%s/sarg-general3",outdirname);
-   sprintf(per,"%s/sarg-period",outdirname);
-
-   if ((ReportType & REPORT_TYPE_TOPUSERS) == 0)
-      sprintf(report,"%s/index.html",outdirname);
-   else
-      sprintf(report,"%s/topsites.html",outdirname);
-
-   if ((fp_in = fopen(per, "r")) == 0) {
-      debuga(_("(topsites) Cannot open file: %s\n"),per);
-      exit(1);
-   }
-
-   if (!fgets(period,sizeof(period),fp_in)) {
-      debuga(_("(topsites) read error in %s\n"),per);
-      exit(1);
-   }
-   fclose(fp_in);
-
-   sprintf(csort,"sort -k 4,4 -o \"%s\" \"%s\"",general2,general);
-   cstatus=system(csort);
-   if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
-      debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
-      debuga(_("sort command: %s\n"),csort);
-      exit(1);
-   }
-
-   if((fp_in=fopen(general2,"r"))==NULL) {
-     debuga(_("(topsites) Cannot open log file %s\n"),general2);
-     debuga(_("sort command: %s\n"),csort);
-     exit(1);
-   }
-
-   if((fp_ou=fopen(general3,"w"))==NULL) {
-     debuga(_("(topsites) Cannot open log file %s\n"),general3);
-     exit(1);
-   }
-
-   if (longline_prepare(&line)<0) {
-      debuga(_("Not enough memory to read file %s\n"),general2);
-      exit(1);
-   }
-
-   while((buf=longline_read(fp_in,&line))!=NULL) {
-      ger_read(buf,&item,general2);
-      if(item.total) continue;
-
-      if(!regs) {
-         url_len=strlen(item.url);
-         if (!ourl || url_len>=ourl_size) {
-            ourl_size=url_len+1;
-            ourl=realloc(ourl,ourl_size);
-            if (!ourl) {
-               debuga(_("Not enough memory to store the url\n"));
-               exit(1);
-            }
-         }
-         strcpy(ourl,item.url);
-         regs++;
-      }
-
-      if(strcmp(item.url,ourl) != 0) {
-         fprintf(fp_ou,"%lld\t%lld\t%lld\t%s\n",tnacc,tnbytes,tntime,ourl);
-         url_len=strlen(item.url);
-         if (url_len>=ourl_size) {
-            ourl_size=url_len+1;
-            ourl=realloc(ourl,ourl_size);
-            if (!ourl) {
-               debuga(_("Not enough memory to store the url\n"));
-               exit(1);
-            }
-         }
-         strcpy(ourl,item.url);
-         tnacc=0;
-         tnbytes=0;
-         tntime=0;
-      }
-
-      tnacc+=item.nacc;
-      tnbytes+=item.nbytes;
-      tntime+=item.nelap;
-   }
-   fclose(fp_in);
-   unlink(general2);
-   longline_free(&line);
-
-   if (ourl) {
-      fprintf(fp_ou,"%lld\t%lld\t%lld\t%s\n",tnacc,tnbytes,tntime,ourl);
-      free(ourl);
-   }
-
-   fclose(fp_ou);
-
-   strlow(TopsitesSortField);
-   strlow(TopsitesSortType);
-
-   if(strcmp(TopsitesSortField,"connect") == 0)
-      sortf="-k 1,1 -k 2,2";
-   else if(strcmp(TopsitesSortField,"bytes") == 0)
-      sortf="-k 2,2 -k 1,1";
-   else
-      sortf="";
-   if(strcmp(TopsitesSortType,"a") == 0)
-      sortt="";
-   else if(strcmp(TopsitesSortType,"d") == 0)
-      sortt="-r";
-   else
-      sortt="";
-
-   sprintf(csort,"sort %s -n %s -o \"%s\" \"%s\"",sortt,sortf,sites,general3);
-   cstatus=system(csort);
-   if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
-      debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus));
-      debuga(_("sort command: %s\n"),csort);
-      exit(1);
-   }
-   if((fp_in=fopen(sites,"r"))==NULL) {
-      debuga(_("(topsites) Cannot open log file %s\n"),sites);
-      debuga(_("sort command: %s\n"),csort);
-      exit(1);
-   }
-
-   unlink(general2);
-   unlink(general3);
-
-   if((fp_ou=fopen(report,"w"))==NULL) {
-     debuga(_("(topsites) Cannot open log file %s\n"),report);
-     exit(1);
-   }
-
-   write_html_header(fp_ou,(IndexTree == INDEX_TREE_DATE) ? 3 : 1,_("Top sites"));
-   fputs("<tr><td class=\"header_c\">",fp_ou);
-   fprintf(fp_ou,_("Period: %s"),period);
-   fputs("</td></tr>\n",fp_ou);
-   fputs("<tr><th class=\"header_c\">",fp_ou);
-   fprintf(fp_ou,_("Top %d sites"),TopSitesNum);
-   fputs("</th></tr>\n",fp_ou);
-   close_html_header(fp_ou);
-
-   fputs("<div class=\"report\"><table cellpadding=\"1\" cellspacing=\"2\">\n",fp_ou);
-   fputs("<tr><td></td></tr>\n",fp_ou);
-   fprintf(fp_ou,"<tr><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr>\n",_("NUM"),_("ACCESSED SITE"),_("CONNECT"),_("BYTES"),_("TIME"));
-
-   regs=0;
-   ntopsites = 0;
-
-   if (longline_prepare(&line)<0) {
-      debuga(_("Not enough memory to read file %s\n"),sites);
-      exit(1);
-   }
-
-   while(regs<TopSitesNum && (buf=longline_read(fp_in,&line))!=NULL) {
-      getword_start(&gwarea,buf);
-      if (getword_atoll(&nacc,&gwarea,'\t')<0) {
-         debuga(_("Maybe you have a broken record or garbage in your %s file\n"),sites);
-         exit(1);
-      }
-      if (nacc == 0) continue;
-      if (getword_atoll(&nbytes,&gwarea,'\t')<0 || getword_atoll(&ntime,&gwarea,'\t')<0) {
-         debuga(_("Maybe you have a broken record or garbage in your %s file\n"),sites);
-         exit(1);
-      }
-      if (getword_ptr(buf,&url,&gwarea,'\t')<0) {
-         debuga(_("The url is invalid in file %s\n"),sites);
-         exit(1);
-      }
-
-      twork1=nacc;
-      twork2=nbytes;
-      twork3=ntime;
-
-      strcpy(wwork1,fixnum(twork1,1));
-      strcpy(wwork2,fixnum(twork2,1));
-      strcpy(wwork3,fixtime(twork3));
-
-      fprintf(fp_ou,"<tr><td class=\"data\">%d</td><td class=\"data2 link\">",++regs);
-
-      if(BlockIt[0] != '\0') {
-         fprintf(fp_ou,"<a href=\"%s%s?url=\"",wwwDocumentRoot,BlockIt);
-         output_html_url(fp_ou,url);
-         fputs("\"><img src=\"../images/sarg-squidguard-block.png\"></a>&nbsp;",fp_ou);
-      }
-
-      fputs("<a href=\"http://",fp_ou);
-      output_html_url(fp_ou,url);
-      fputs("\">",fp_ou);
-      output_html_string(fp_ou,url,100);
-      fprintf(fp_ou,"</a></td><td class=\"data\">%s</td><td class=\"data\">%s</td><td class=\"data\">%s</td></tr>\n",wwork1,wwork2,wwork3);
-   }
-   fclose(fp_in);
-   longline_free(&line);
-
-   fputs("</table></div>\n",fp_ou);
-   write_html_trailer(fp_ou);
-   fclose(fp_ou);
-
-   return;
-
+       FileObject *fp_in;
+       FILE *fp_ou;
+
+       char *buf;
+       char *url;
+       char *ourl=NULL;
+       char csort[4096];
+       char general[MAXLEN];
+       char general2[MAXLEN];
+       char general3[MAXLEN];
+       char sites[MAXLEN];
+       char report[MAXLEN];
+       char ouser[MAX_USER_LEN]="";
+       const char *sortf;
+       const char *sortt;
+       long long int nacc;
+       long long int nbytes;
+       long long int ntime;
+       long long int tnacc=0;
+       long long int tnbytes=0;
+       long long int tntime=0;
+       long long int twork1=0, twork2=0, twork3=0;
+#ifdef ENABLE_DOUBLE_CHECK_DATA
+       long long int ttnacc=0;
+       long long int ttnbytes=0;
+       long long int ttntime=0;
+#endif
+       int nusers=0;
+       int regs=0;
+       int cstatus;
+       int url_len;
+       int ourl_size=0;
+       struct getwordstruct gwarea;
+       longline line;
+       struct generalitemstruct item;
+
+       if(Privacy) {
+               if (debugz>=LogLevel_Process) debugaz(__FILE__,__LINE__,_("Top sites report not produced because privacy option is on\n"));
+               return;
+       }
+       if (debugz>=LogLevel_Process)
+               debuga(__FILE__,__LINE__,_("Creating top sites report...\n"));
+
+       sprintf(general,"%s/sarg-general",outdirname);
+       sprintf(sites,"%s/sarg-sites",outdirname);
+       sprintf(general2,"%s/sarg-general2",outdirname);
+       sprintf(general3,"%s/sarg-general3",outdirname);
+
+       sprintf(report,"%s/topsites.html",outdirname);
+
+       if (snprintf(csort,sizeof(csort),"sort -t \"\t\" -k 4,4 -k 1,1 -o \"%s\" \"%s\"",general2,general)>=sizeof(csort)) {
+               debuga(__FILE__,__LINE__,_("Sort command too long when sorting file \"%s\" to \"%s\"\n"),general,general2);
+               exit(EXIT_FAILURE);
+       }
+       cstatus=system(csort);
+       if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
+               debuga(__FILE__,__LINE__,_("sort command return status %d\n"),WEXITSTATUS(cstatus));
+               debuga(__FILE__,__LINE__,_("sort command: %s\n"),csort);
+               exit(EXIT_FAILURE);
+       }
+
+       if((fp_in=FileObject_Open(general2))==NULL) {
+               debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),general2,FileObject_GetLastOpenError());
+               debuga(__FILE__,__LINE__,_("sort command: %s\n"),csort);
+               exit(EXIT_FAILURE);
+       }
+
+       if((fp_ou=fopen(general3,"w"))==NULL) {
+               debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),general3,strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+
+       if ((line=longline_create())==NULL) {
+               debuga(__FILE__,__LINE__,_("Not enough memory to read file \"%s\"\n"),general2);
+               exit(EXIT_FAILURE);
+       }
+
+       while((buf=longline_read(fp_in,line))!=NULL) {
+               ger_read(buf,&item,general2);
+               if(item.total) continue;
+
+               if(!regs) {
+                       url_len=strlen(item.url);
+                       if (!ourl || url_len>=ourl_size) {
+                               ourl_size=url_len+1;
+                               ourl=realloc(ourl,ourl_size);
+                               if (!ourl) {
+                                       debuga(__FILE__,__LINE__,_("Not enough memory to store the url\n"));
+                                       exit(EXIT_FAILURE);
+                               }
+                       }
+                       strcpy(ourl,item.url);
+                       regs++;
+               }
+
+               if(strcmp(item.url,ourl) != 0) {
+                       /*
+                       This complicated printf is due to Microsoft's inability to comply with any standard. Msvcrt is unable
+                       to print a long long int unless it is exactly 64-bits long.
+                       */
+                       fprintf(fp_ou,"%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%d\t%s\n",(uint64_t)tnacc,(uint64_t)tnbytes,(uint64_t)tntime,nusers,ourl);
+                       url_len=strlen(item.url);
+                       if (url_len>=ourl_size) {
+                               ourl_size=url_len+1;
+                               ourl=realloc(ourl,ourl_size);
+                               if (!ourl) {
+                                       debuga(__FILE__,__LINE__,_("Not enough memory to store the url\n"));
+                                       exit(EXIT_FAILURE);
+                               }
+                       }
+                       strcpy(ourl,item.url);
+                       strcpy(ouser,item.user);
+                       tnacc=0;
+                       tnbytes=0;
+                       tntime=0;
+                       nusers=1;
+               } else if (strcmp(item.user,ouser)!=0) {
+                       strcpy(ouser,item.user);
+                       nusers++;
+               }
+
+               tnacc+=item.nacc;
+               tnbytes+=item.nbytes;
+               tntime+=item.nelap;
+#ifdef ENABLE_DOUBLE_CHECK_DATA
+               ttnacc+=item.nacc;
+               ttnbytes+=item.nbytes;
+               ttntime+=item.nelap;
+#endif
+       }
+       if (FileObject_Close(fp_in)) {
+               debuga(__FILE__,__LINE__,_("Read error in \"%s\": %s\n"),general2,FileObject_GetLastCloseError());
+               exit(EXIT_FAILURE);
+       }
+       longline_destroy(&line);
+
+       if (ourl) {
+               /*
+               This complicated printf is due to Microsoft's inability to comply with any standard. Msvcrt is unable
+               to print a long long int unless it is exactly 64-bits long.
+               */
+               fprintf(fp_ou,"%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%d\t%s\n",(uint64_t)tnacc,(uint64_t)tnbytes,(uint64_t)tntime,nusers,ourl);
+               free(ourl);
+       }
+
+       if (fclose(fp_ou)==EOF) {
+               debuga(__FILE__,__LINE__,_("Write error in \"%s\": %s\n"),general3,strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+
+#ifdef ENABLE_DOUBLE_CHECK_DATA
+       if (ttnacc!=globstat.nacc || ttnbytes!=globstat.nbytes || ttntime!=globstat.elap) {
+               debuga(__FILE__,__LINE__,_("Total statistics mismatch when reading \"%s\" to produce the top sites\n"),general2);
+               exit(EXIT_FAILURE);
+       }
+#endif
+
+       if (!KeepTempLog && unlink(general2)) {
+               debuga(__FILE__,__LINE__,_("Cannot delete \"%s\": %s\n"),general2,strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+
+       if((TopsitesSort & TOPSITE_SORT_CONNECT) != 0) {
+               sortf="-k 1,1 -k 2,2";
+       } else if((TopsitesSort & TOPSITE_SORT_BYTES) != 0) {
+               sortf="-k 2,2 -k 1,1";
+       } else if((TopsitesSort & TOPSITE_SORT_TIME) != 0) {
+               sortf="-k 3,3";
+       } else if((TopsitesSort & TOPSITE_SORT_USER) != 0) {
+               sortf="-k 4,4 -k 1,1 -k 2,2";
+       } else {
+               sortf="-k 2,2 -k 1,1"; //default is BYTES
+       }
+       if((TopsitesSort & TOPSITE_SORT_REVERSE) != 0) {
+               sortt="-r";
+       } else {
+               sortt="";
+       }
+
+       if (snprintf(csort,sizeof(csort),"sort -t \"\t\" %s -n %s -o \"%s\" \"%s\"",sortt,sortf,sites,general3)>=sizeof(csort)) {
+               debuga(__FILE__,__LINE__,_("Sort command too long when sorting file \"%s\" to \"%s\"\n"),general3,sites);
+               exit(EXIT_FAILURE);
+       }
+       cstatus=system(csort);
+       if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
+               debuga(__FILE__,__LINE__,_("sort command return status %d\n"),WEXITSTATUS(cstatus));
+               debuga(__FILE__,__LINE__,_("sort command: %s\n"),csort);
+               exit(EXIT_FAILURE);
+       }
+       if((fp_in=FileObject_Open(sites))==NULL) {
+               debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),sites,FileObject_GetLastOpenError());
+               debuga(__FILE__,__LINE__,_("sort command: %s\n"),csort);
+               exit(EXIT_FAILURE);
+       }
+
+       if (!KeepTempLog && unlink(general3)) {
+               debuga(__FILE__,__LINE__,_("Cannot delete \"%s\": %s\n"),general3,strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+
+       if((fp_ou=fopen(report,"w"))==NULL) {
+               debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),report,strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+
+       write_html_header(fp_ou,(IndexTree == INDEX_TREE_DATE) ? 3 : 1,_("Top sites"),HTML_JS_SORTTABLE);
+       fputs("<tr><td class=\"header_c\">",fp_ou);
+       fprintf(fp_ou,_("Period: %s"),period.html);
+       fputs("</td></tr>\n",fp_ou);
+       fputs("<tr><th class=\"header_c\">",fp_ou);
+       fprintf(fp_ou,_("Top %d sites"),TopSitesNum);
+       fputs("</th></tr>\n",fp_ou);
+       close_html_header(fp_ou);
+
+       fputs("<div class=\"report\"><table cellpadding=\"1\" cellspacing=\"2\"",fp_ou);
+       if (SortTableJs[0]) fputs(" class=\"sortable\"",fp_ou);
+       fputs(">\n",fp_ou);
+       fprintf(fp_ou,"<thead><tr><th class=\"header_l\">%s</th><th class=\"header_l",
+       /* TRANSLATORS: This is a column header showing the position of the entry in the sorted list. */
+       _("NUM"));
+       if (SortTableJs[0]) fputs(" sorttable_alpha",fp_ou);
+       fprintf(fp_ou,"\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th><th class=\"header_l\">%s</th></tr></thead>\n",
+       /* TRANSLATORS: This is a column header showing the URL of the visited sites. */
+       _("ACCESSED SITE"),
+       /* TRANSLATORS: This is a column header showing the number of connections to a visited site. */
+       _("CONNECT"),
+       /* TRANSLATORS: This is a column header showing the number of transfered bytes. */
+       _("BYTES"),
+       /* TRANSLATORS: This is a column header showing the time spent by the proxy processing the requests. */
+       pgettext("duration","TIME"),
+       /* TRANSLATORS: This is a column header showing the number of users who visited a sites. */
+       _("USERS"));
+
+       regs=0;
+       ntopsites = 0;
+
+       if ((line=longline_create())==NULL) {
+               debuga(__FILE__,__LINE__,_("Not enough memory to read file \"%s\"\n"),sites);
+               exit(EXIT_FAILURE);
+       }
+
+       while(regs<TopSitesNum && (buf=longline_read(fp_in,line))!=NULL) {
+               getword_start(&gwarea,buf);
+               if (getword_atoll(&nacc,&gwarea,'\t')<0) {
+                       debuga(__FILE__,__LINE__,_("Invalid record in file \"%s\"\n"),sites);
+                       exit(EXIT_FAILURE);
+               }
+               if (nacc == 0) continue;
+               if (getword_atoll(&nbytes,&gwarea,'\t')<0 || getword_atoll(&ntime,&gwarea,'\t')<0) {
+                       debuga(__FILE__,__LINE__,_("Invalid record in file \"%s\"\n"),sites);
+                       exit(EXIT_FAILURE);
+               }
+               if (getword_atoi(&nusers,&gwarea,'\t')<0) {
+                       debuga(__FILE__,__LINE__,_("Invalid number of users in file \"%s\"\n"),sites);
+                       exit(EXIT_FAILURE);
+               }
+               if (getword_ptr(buf,&url,&gwarea,'\t')<0) {
+                       debuga(__FILE__,__LINE__,_("Invalid url in file \"%s\"\n"),sites);
+                       exit(EXIT_FAILURE);
+               }
+
+               twork1=nacc;
+               twork2=nbytes;
+               twork3=ntime;
+
+               fprintf(fp_ou,"<tr><td class=\"data\">%d</td><td class=\"data2\">",++regs);
+
+               if(BlockIt[0] != '\0' && url[0]!=ALIAS_PREFIX) {
+                       fprintf(fp_ou,"<a href=\"%s%s?url=\"",wwwDocumentRoot,BlockIt);
+                       output_html_url(fp_ou,url);
+                       fputs("\"><img src=\"../images/sarg-squidguard-block.png\"></a>&nbsp;",fp_ou);
+               }
+
+               output_html_link(fp_ou,url,100);
+               fputs("</td><td class=\"data\"",fp_ou);
+               if (SortTableJs[0]) fprintf(fp_ou," sorttable_customkey=\"%"PRId64"\"",(uint64_t)twork1);
+               fprintf(fp_ou,">%s</td>",fixnum(twork1,1));
+               fputs("<td class=\"data\"",fp_ou);
+               if (SortTableJs[0]) fprintf(fp_ou," sorttable_customkey=\"%"PRId64"\"",(uint64_t)twork2);
+               fprintf(fp_ou,">%s</td>",fixnum(twork2,1));
+               fputs("<td class=\"data\"",fp_ou);
+               if (SortTableJs[0]) fprintf(fp_ou," sorttable_customkey=\"%"PRId64"\"",(uint64_t)twork3);
+               fprintf(fp_ou,">%s</td>",fixtime(twork3));
+               fputs("<td class=\"data\"",fp_ou);
+               if (SortTableJs[0]) fprintf(fp_ou," sorttable_customkey=\"%d\"",nusers);
+               fprintf(fp_ou,">%s</td></tr>\n",fixnum(nusers,1));
+       }
+       if (FileObject_Close(fp_in)) {
+               debuga(__FILE__,__LINE__,_("Read error in \"%s\": %s\n"),sites,FileObject_GetLastCloseError());
+               exit(EXIT_FAILURE);
+       }
+       longline_destroy(&line);
+
+       fputs("</table></div>\n",fp_ou);
+       write_html_trailer(fp_ou);
+       if (fclose(fp_ou)==EOF) {
+               debuga(__FILE__,__LINE__,_("Write error in \"%s\": %s\n"),report,strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+
+       return;
 }