From: Frédéric Marchal Date: Mon, 27 Dec 2010 15:00:51 +0000 (+0000) Subject: Keep global statistics in memory and use them to check computation X-Git-Tag: v2.3.2~103 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=9dc209887bd379d7edb0cef98ac38511c108a59a;p=thirdparty%2Fsarg.git Keep global statistics in memory and use them to check computation The total statistics of the log files are kept in memory. They are reused when necessary instead of parsing again sarg-general. They are used to validate the processing of sarg-general during the creation of the e-mail, the top sites and the top users. --- diff --git a/CMakeLists.txt b/CMakeLists.txt index dd5a1b3..aea8d37 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ PROJECT(sarg C) SET(sarg_VERSION 2) SET(sarg_REVISION "3.2-pre1") SET(sarg_BUILD "") -SET(sarg_BUILDDATE "Dec-26-2010") +SET(sarg_BUILDDATE "Dec-27-2010") INCLUDE(AddFileDependencies) INCLUDE(CheckIncludeFile) @@ -47,7 +47,7 @@ SET(SARGPHPDIR "share/sarg/sarg-php" CACHE PATH "The directory to copy sarg-php INCLUDE_DIRECTORIES("${CMAKE_BINARY_DIR}") #Make it find this config.h before the one in src/include INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/include") SET(SRC util.c log.c report.c topuser.c email.c sort.c html.c - totger.c index.c getconf.c usage.c decomp.c ip2name.c + index.c getconf.c usage.c decomp.c ip2name.c useragent.c exclude.c convlog.c totday.c repday.c datafile.c indexonly.c splitlog.c lastlog.c topsites.c siteuser.c css.c smartfilter.c denied.c authfail.c charset.c diff --git a/Makefile.in b/Makefile.in index de9da8a..ff5370e 100644 --- a/Makefile.in +++ b/Makefile.in @@ -32,7 +32,7 @@ INSTALL = cp INSTALL_PROGRAM = $(INSTALL) SRCS = util.c log.c report.c topuser.c email.c sort.c html.c \ - totger.c index.c getconf.c usage.c decomp.c ip2name.c \ + index.c getconf.c usage.c decomp.c ip2name.c \ useragent.c exclude.c convlog.c totday.c repday.c datafile.c\ indexonly.c splitlog.c lastlog.c topsites.c siteuser.c css.c \ smartfilter.c denied.c authfail.c charset.c \ diff --git a/email.c b/email.c index a0dbe61..886b447 100644 --- a/email.c +++ b/email.c @@ -27,6 +27,10 @@ #include "include/conf.h" #include "include/defs.h" +#ifdef ENABLE_DOUBLE_CHECK_DATA +extern struct globalstatstruct globstat; +#endif + int geramail(const char *dirname, int debug, const char *outdir, const char *email, const char *TempDir) { FILE *fp_in, *fp_top1, *fp_top2, *fp_top3; @@ -119,6 +123,13 @@ int geramail(const char *dirname, int debug, const char *outdir, const char *ema fclose(fp_top2); +#ifdef ENABLE_DOUBLE_CHECK_DATA + if (ttnacc!=globstat.nacc || ttnbytes!=globstat.nbytes || ttnelap!=globstat.elap) { + debuga(_("Total statistics mismatch when reading %s to produce the email report\n"),wger); + exit(EXIT_FAILURE); + } +#endif + sprintf(csort,"sort -n -T \"%s\" -r -k 2,2 -o \"%s\" \"%s\"", TempDir, top1, top2); cstatus=system(csort); if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) { diff --git a/html.c b/html.c index 5c4ba85..0f7e5ba 100644 --- a/html.c +++ b/html.c @@ -27,6 +27,8 @@ #include "include/conf.h" #include "include/defs.h" +extern struct globalstatstruct globstat; + void htmlrel(void) { FILE *fp_in, *fp_ou, *fp_ip, *fp_ip2, *fp_usr; @@ -57,7 +59,6 @@ void htmlrel(void) char siteind[MAX_TRUNCATED_URL]; struct getwordstruct gwarea; longline line,line1; - struct generalitemstruct item; const struct userinfostruct *uinfo; userscan uscan; @@ -69,28 +70,9 @@ void htmlrel(void) strcpy(tmp3,tmp); strcat(tmp3,"/sargtmp.log"); - snprintf(arqper,sizeof(arqper),"%s/sarg-general",outdirname); - if ((fp_in = fopen(arqper, "r")) == 0){ - debuga(_("(html2) Cannot open file %s\n"),arqper); - exit(EXIT_FAILURE); - } - if ((line=longline_create())==NULL) { - debuga(_("Not enough memory to read file %s\n"),arqper); - exit(EXIT_FAILURE); - } - ttnacc=0; - totbytes=0; - totelap=0; - while((buf=longline_read(fp_in,line))!=NULL) { - ger_read(buf,&item,arqper); - if(item.total) { - ttnacc+=item.nacc; - totbytes+=item.nbytes; - totelap+=item.nelap; - } - } - fclose(fp_in); - longline_destroy(&line); + tnacc=globstat.nacc; + totbytes=globstat.nbytes; + totelap=globstat.elap; snprintf(arqper,sizeof(arqper),"%s/sarg-users",outdirname); if ((fp_in = fopen(arqper, "r")) == 0){ diff --git a/include/defs.h b/include/defs.h index f1de44a..9892c84 100755 --- a/include/defs.h +++ b/include/defs.h @@ -56,6 +56,22 @@ struct userinfostruct //! Scan through the known users. typedef struct userscanstruct *userscan; +/*! \brief Global statistics +*/ +struct globalstatstruct +{ + //! Total number of accesses. + long long int nacc; + //! Total number of bytes. + long long int nbytes; + //! Total time spent processing the requests. + long long int elap; + //! Amount of data fetched from the cache. + long long int incache; + //! Amount of data not fetched from the cache. + long long int oucache; +}; + // auth.c void htaccess(const struct userinfostruct *uinfo); @@ -143,6 +159,7 @@ void report_day(const struct userinfostruct *user); // report.c void gerarel(void); int ger_read(char *buffer,struct generalitemstruct *item,const char *filename); +void totalger(FILE *fp_gen,const char *filename); // siteuser.c void siteuser(void); @@ -173,9 +190,6 @@ void topuser(void); // totday.c void day_totalize(const char *tmp, const struct userinfostruct *uinfo, int indexonly); -// totger.c -int totalger(const char *dirname, int debug, const char *outdir); - // usage.c void usage(const char *prog); diff --git a/include/info.h b/include/info.h index f826174..f74bc7f 100755 --- a/include/info.h +++ b/include/info.h @@ -1,3 +1,3 @@ -#define VERSION PACKAGE_VERSION" Dec-26-2010" +#define VERSION PACKAGE_VERSION" Dec-27-2010" #define PGM PACKAGE_NAME #define URL "http://sarg.sourceforge.net" diff --git a/report.c b/report.c index 01b1dbc..fdc350c 100644 --- a/report.c +++ b/report.c @@ -27,6 +27,9 @@ #include "include/conf.h" #include "include/defs.h" +//! The global statistics of the whole log read. +struct globalstatstruct globstat; + static FILE *fp_tt=NULL; static void maketmp(const char *user, const char *dirname, int debug, int indexonly); @@ -34,7 +37,7 @@ static void maketmp_hour(const char *user, const char *dirname, int indexonly); static void gravatmp_hora(const char *dirname, const struct userinfostruct *uinfo, const char *data, const char *hora, long long int elap, long long int accbytes, int indexonly); static void gravatmp(const struct userinfostruct *uinfo, const char *oldurl, long long int nacc, long long int nbytes, const char *oldmsg, long long int nelap, int indexonly, long long int incache, long long int oucache); static void gravaporuser(const struct userinfostruct *uinfo, const char *dirname, const char *url, const char *ip, const char *data, const char *hora, long long int tam, long long int elap, int indexonly); -static void gravager(FILE *fp_gen, const struct userinfostruct *uinfo, long long int nacc, const char *url, long long int nbytes, const char *ip, const char *hora, const char *dia, long long int nelap, long long int incache, long long int oucache); +static void gravager(FILE *fp_gen,const char *filename, const struct userinfostruct *uinfo, long long int nacc, const char *url, long long int nbytes, const char *ip, const char *hora, const char *dia, long long int nelap, long long int incache, long long int oucache); static void grava_SmartFilter(const char *dirname, const char *user, const char *ip, const char *data, const char *hora, const char *url, const char *smart); void gerarel(void) @@ -81,6 +84,7 @@ void gerarel(void) ipantes[0]='\0'; smartfilter=0; + memset(&globstat,0,sizeof(globstat)); if (vrfydir(&period, addr, site, us, email)<0) { debuga(_("Cannot create the output directory name containing the period as part of the name\n")); @@ -208,7 +212,7 @@ void gerarel(void) else strcpy(oldmsg,"OK"); gravatmp(puinfo,oldurl,nacc,nbytes,oldmsg,nelap,indexonly,incache,oucache); - gravager(fp_gen,puinfo,nacc,oldurl,nbytes,oldaccip,oldacchora,oldaccdia,nelap,incache,oucache); + gravager(fp_gen,wdirname,puinfo,nacc,oldurl,nbytes,oldaccip,oldacchora,oldaccdia,nelap,incache,oucache); nacc=0; nbytes=0; nelap=0; @@ -222,7 +226,7 @@ void gerarel(void) else strcpy(oldmsg,"OK"); gravatmp(puinfo,oldurl,nacc,nbytes,oldmsg,nelap,indexonly,incache,oucache); - gravager(fp_gen,puinfo,nacc,oldurl,nbytes,oldaccip,oldacchora,oldaccdia,nelap,incache,oucache); + gravager(fp_gen,wdirname,puinfo,nacc,oldurl,nbytes,oldaccip,oldacchora,oldaccdia,nelap,incache,oucache); nacc=0; nbytes=0; nelap=0; @@ -336,16 +340,15 @@ void gerarel(void) else strcpy(oldmsg,"OK"); gravatmp(puinfo,oldurl,nacc,nbytes,oldmsg,nelap,indexonly,incache,oucache); - gravager(fp_gen,puinfo,nacc,oldurl,nbytes,oldaccip,oldacchora,oldaccdia,nelap,incache,oucache); + gravager(fp_gen,wdirname,puinfo,nacc,oldurl,nbytes,oldaccip,oldacchora,oldaccdia,nelap,incache,oucache); free(oldurl); } + totalger(fp_gen,wdirname); fclose(fp_gen); if (puinfo) day_totalize(tmp,puinfo,indexonly); tmpsort(); - totalger(outdirname, debug, outdir); - if(email[0] == '\0') { if((ReportType & REPORT_TYPE_DOWNLOADS) != 0) download_report(); @@ -540,16 +543,40 @@ static void gravaporuser(const struct userinfostruct *uinfo, const char *dirname } -static void gravager(FILE *fp_gen, const struct userinfostruct *uinfo, long long int nacc, const char *url, long long int nbytes, const char *ip, const char *hora, const char *dia, long long int nelap, long long int incache, long long int oucache) +static void gravager(FILE *fp_gen,const char *filename, const struct userinfostruct *uinfo, long long int nacc, const char *url, long long int nbytes, const char *ip, const char *hora, const char *dia, long long int nelap, long long int incache, long long int oucache) { /* This complicated printf is due to Microsoft's inability to comply with any standard. Msvcrt is unable to print a long long int unless it is exactly 64-bits long. */ - fprintf(fp_gen,"%s\t%"PRIu64"\t%"PRIu64"\t%s\t%s\t%s\t%s\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\n",uinfo->id,(uint64_t)nacc,(uint64_t)nbytes,url,ip,hora,dia,(uint64_t)nelap,(uint64_t)incache,(uint64_t)oucache); + if (fprintf(fp_gen,"%s\t%"PRIu64"\t%"PRIu64"\t%s\t%s\t%s\t%s\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\n",uinfo->id,(uint64_t)nacc,(uint64_t)nbytes,url,ip,hora,dia,(uint64_t)nelap,(uint64_t)incache,(uint64_t)oucache)<0) { + debuga(_("Failed to write a line in %s\n"),filename); + exit(EXIT_FAILURE); + } + + globstat.nacc+=nacc; + globstat.nbytes+=nbytes; + globstat.elap+=nelap; + globstat.incache+=incache; + globstat.oucache+=oucache; return; } +/*! +Write the total line at the end of the general file. +*/ +void totalger(FILE *fp_gen,const char *filename) +{ + /* + This complicated printf is due to Microsoft's inability to comply with any standard. Msvcrt is unable + to print a long long int unless it is exactly 64-bits long. + */ + if (fprintf(fp_gen,"TOTAL\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\n",(uint64_t)globstat.nacc,(uint64_t)globstat.nbytes,(uint64_t)globstat.elap,(uint64_t)globstat.incache,(uint64_t)globstat.oucache)<0) { + debuga(_("Failed to write the total line in %s\n"),filename); + exit(EXIT_FAILURE); + } +} + int ger_read(char *buffer,struct generalitemstruct *item,const char *filename) { int i; diff --git a/topsites.c b/topsites.c index 950efdb..7bc54a0 100644 --- a/topsites.c +++ b/topsites.c @@ -27,6 +27,10 @@ #include "include/conf.h" #include "include/defs.h" +#ifdef ENABLE_DOUBLE_CHECK_DATA +extern struct globalstatstruct globstat; +#endif + void topsites(void) { FILE *fp_in, *fp_ou; @@ -49,6 +53,11 @@ void topsites(void) long long int tnbytes=0; long long int tntime=0; long long int twork1=0, twork2=0, twork3=0; +#ifdef ENABLE_DOUBLE_CHECK_DATA + long long int ttnacc=0; + long long int ttnbytes=0; + long long int ttntime=0; +#endif int regs=0; int cstatus; int url_len; @@ -136,12 +145,13 @@ void topsites(void) tnacc+=item.nacc; tnbytes+=item.nbytes; tntime+=item.nelap; +#ifdef ENABLE_DOUBLE_CHECK_DATA + ttnacc+=item.nacc; + ttnbytes+=item.nbytes; + ttntime+=item.nelap; +#endif } fclose(fp_in); - if (unlink(general2)) { - debuga(_("Cannot delete %s - %s\n"),general2,strerror(errno)); - exit(EXIT_FAILURE); - } longline_destroy(&line); if (ourl) { @@ -155,6 +165,18 @@ void topsites(void) fclose(fp_ou); +#ifdef ENABLE_DOUBLE_CHECK_DATA + if (ttnacc!=globstat.nacc || ttnbytes!=globstat.nbytes || ttntime!=globstat.elap) { + debuga(_("Total statistics mismatch when reading %s to produce the top sites\n"),general2); + exit(EXIT_FAILURE); + } +#endif + + if (unlink(general2)) { + debuga(_("Cannot delete %s - %s\n"),general2,strerror(errno)); + exit(EXIT_FAILURE); + } + if((TopsitesSort & TOPSITE_SORT_CONNECT) != 0) { sortf="-k 1,1 -k 2,2"; } else if((TopsitesSort & TOPSITE_SORT_BYTES) != 0) { diff --git a/topuser.c b/topuser.c index 01a32d7..9e7d09b 100644 --- a/topuser.c +++ b/topuser.c @@ -27,6 +27,10 @@ #include "include/conf.h" #include "include/defs.h" +#ifdef ENABLE_DOUBLE_CHECK_DATA +extern struct globalstatstruct globstat; +#endif + void topuser(void) { FILE *fp_in = NULL, *fp_ou = NULL, *fp_top1 = NULL, *fp_top2 = NULL, *fp_top3 = NULL; @@ -133,6 +137,14 @@ void topuser(void) } fclose(fp_top2); +#ifdef ENABLE_DOUBLE_CHECK_DATA + if (ttnacc!=globstat.nacc || ttnbytes!=globstat.nbytes || ttnelap!=globstat.elap || + ttnincache!=globstat.incache || ttnoucache!=globstat.oucache) { + debuga(_("Total statistics mismatch when reading %s to produce the top users\n"),wger); + exit(EXIT_FAILURE); + } +#endif + if((TopuserSort & TOPUSER_SORT_USER) != 0) { sfield="-k 1,1"; sort_field=_("user"); diff --git a/totger.c b/totger.c deleted file mode 100644 index 2127394..0000000 --- a/totger.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * SARG Squid Analysis Report Generator http://sarg.sourceforge.net - * 1998, 2010 - * - * SARG donations: - * please look at http://sarg.sourceforge.net/donations.php - * Support: - * http://sourceforge.net/projects/sarg/forums/forum/363374 - * --------------------------------------------------------------------- - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. - * - */ - -#include "include/conf.h" -#include "include/defs.h" - -int totalger(const char *dirname, int debug, const char *outdir) -{ - FILE *fp_in; - long long int tnacc=0; - long long int tnbytes=0; - long long int telap=0; - long long int tincache=0, toucache=0; - char wger[MAXLEN]; - char *warea; - longline line; - struct generalitemstruct item; - - snprintf(wger,sizeof(wger),"%s/sarg-general",dirname); - if((fp_in=fopen(wger,"r+"))==NULL) { - debuga(_("(totger) Cannot open file %s\n"),wger); - exit(EXIT_FAILURE); - } - - if ((line=longline_create())==NULL) { - debuga(_("Not enough memory to read the temporary file %s\n"),wger); - exit(EXIT_FAILURE); - } - - while((warea=longline_read(fp_in,line))!=NULL) - { - ger_read(warea,&item,wger); - tnacc+=item.nacc; - tnbytes+=item.nbytes; - telap+=item.nelap; - tincache+=item.incache; - toucache+=item.oucache; - } - - longline_destroy(&line); - - if (fseek(fp_in,0,SEEK_END)==-1) { - debuga(_("Failed to move to the end of %s - %s\n"),wger,strerror(errno)); - exit(EXIT_FAILURE); - } - - /* - This complicated printf is due to Microsoft's inability to comply with any standard. Msvcrt is unable - to print a long long int unless it is exactly 64-bits long. - */ - if (fprintf(fp_in,"TOTAL\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\n",(uint64_t)tnacc,(uint64_t)tnbytes,(uint64_t)telap,(uint64_t)tincache,(uint64_t)toucache)<0) { - debuga(_("Failed to write the total line in %s\n"),wger); - exit(EXIT_FAILURE); - } - if (fclose(fp_in)==EOF) { - debuga(_("Failed to close file %s - %s\n"),wger,strerror(errno)); - exit(EXIT_FAILURE); - } - - return (0); -} diff --git a/util.c b/util.c index 750017a..2807460 100644 --- a/util.c +++ b/util.c @@ -1281,10 +1281,8 @@ char *strup(char *string) void removetmp(const char *outdir) { - FILE *fp_in; - char warea[256]; - char buf[MAXLEN]; - long pos; + FILE *fp_gen; + char filename[256]; if(!RemoveTempFiles) return; @@ -1292,38 +1290,19 @@ void removetmp(const char *outdir) if(debug) { debuga(_("Purging temporary file sarg-general\n")); } - if (snprintf(warea,sizeof(warea),"%s/sarg-general",outdir)>=sizeof(warea)) { + if (snprintf(filename,sizeof(filename),"%s/sarg-general",outdir)>=sizeof(filename)) { debuga(_("(removetmp) directory too long to remove %s/sarg-period\n"),outdir); exit(EXIT_FAILURE); } - if((fp_in=fopen(warea,"r+"))==NULL){ - debuga(_("(removetmp) Cannot open file %s\n"),warea); - exit(EXIT_FAILURE); - } - while(fgets(buf,sizeof(buf),fp_in)!=NULL) { - if(strncmp(buf,"TOTAL",5) == 0 && (buf[6]=='\t' || buf[6]==' ')) - break; - } - if (fseek(fp_in,0,SEEK_SET)==-1) { - debuga(_("Failed to rewind to the beginning of the file %s: %s\n"),warea,strerror(errno)); - exit(EXIT_FAILURE); - } - - if (fputs(buf,fp_in)==EOF) { - debuga(_("Failed to write the total line in %s - %s\n"),warea,strerror(errno)); - exit(EXIT_FAILURE); - } - pos=ftell(fp_in); - if (pos>0 && ftruncate(fileno(fp_in),pos)==-1) { - debuga(_("Failed to truncate %s: %s\n"),warea,strerror(errno)); + if((fp_gen=fopen(filename,"w"))==NULL){ + debuga(_("(removetmp) Cannot open file %s\n"),filename); exit(EXIT_FAILURE); } - if (fclose(fp_in)==EOF) { - debuga(_("Failed to close %s after writing the total line - %s\n"),warea,strerror(errno)); + totalger(fp_gen,filename); + if (fclose(fp_gen)==EOF) { + debuga(_("Failed to close %s after writing the total line - %s\n"),filename,strerror(errno)); exit(EXIT_FAILURE); } - - return; } void load_excludecodes(const char *ExcludeCodes)