X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=topsites.c;h=5992cef520e38c3b672b299095ffddff01137106;hb=2c0584166ffce932276910a4e1ad058f7948f879;hp=e91bcc1d021da47111d814a73256700469bb3146;hpb=05b90947b85a3b181c59fe1c2e88d61229d436fa;p=thirdparty%2Fsarg.git diff --git a/topsites.c b/topsites.c index e91bcc1..5992cef 100644 --- a/topsites.c +++ b/topsites.c @@ -1,10 +1,11 @@ /* - * AUTHOR: Pedro Lineu Orso pedro.orso@gmail.com - * 1998, 2008 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net + * 1998, 2015 * * SARG donations: * please look at http://sarg.sourceforge.net/donations.php + * Support: + * http://sourceforge.net/projects/sarg/forums/forum/363374 * --------------------------------------------------------------------- * * This program is free software; you can redistribute it and/or modify @@ -26,264 +27,311 @@ #include "include/conf.h" #include "include/defs.h" +#ifdef ENABLE_DOUBLE_CHECK_DATA +extern struct globalstatstruct globstat; +#endif + void topsites(void) { - - FILE *fp_in, *fp_ou; - - char url[MAXLEN]; - char ourl[MAXLEN]; - char nacc[20]; - char nbytes[20]; - char ntime[20]; - char ntemp[255]; - char ttnacc[20]; - char ttnbytes[20]; - char ttntime[20]; - char csort[255]; - char general[MAXLEN]; - char general2[MAXLEN]; - char general3[MAXLEN]; - char per[MAXLEN]; - char sites[MAXLEN]; - char report[MAXLEN]; - char period[100]; - char sortf[10]; - char sortt[10]; - long long int tnacc=0; - long long int tnbytes=0; - long long int tntime=0; - long long int twork1=0, twork2=0, twork3=0; - int regs=0; - int cstatus; - - if(strcmp(Privacy,"yes") == 0) - return; - - sprintf(general,"%s/sarg-general",dirname); - sprintf(sites,"%s/sarg-sites",dirname); - sprintf(general2,"%s/sarg-general2",dirname); - sprintf(general3,"%s/sarg-general3",dirname); - sprintf(per,"%s/sarg-period",dirname); - - if (strstr(ReportType,"topusers") == 0) - sprintf(report,"%s/index.html",dirname); - else - sprintf(report,"%s/topsites.html",dirname); - - if ((fp_in = fopen(per, "r")) == 0) { - fprintf(stderr, "SARG: (topuser) %s: %s\n",text[45],per); - exit(1); - } - - if (!fgets(period,sizeof(period),fp_in)) { - fprintf(stderr,"SARG: (topuser) read error in %s\n",per); - exit(1); - } - fclose(fp_in); - - sprintf(csort,"sort -k 4,4 -o \"%s\" \"%s\"",general2,general); - cstatus=system(csort); - if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) { - fprintf(stderr, "SARG: sort command return status %d\n",WEXITSTATUS(cstatus)); - fprintf(stderr, "SARG: sort command: %s\n",csort); - exit(1); - } - - if((fp_in=fopen(general2,"r"))==NULL) { - fprintf(stderr, "SARG: (topsite) %s: %s\n",text[8],general2); - fprintf(stderr, "SARG: sort command: %s\n",csort); - exit(1); - } - - if((fp_ou=fopen(general3,"w"))==NULL) { - fprintf(stderr, "SARG: (topsite) %s: %s\n",text[8],general3); - exit(1); - } - - while(fgets(buf,sizeof(buf),fp_in)!=NULL) { - if (getword(url,sizeof(url),buf,' ')<0) { - printf("SARG: Maybe you have an invalid user in your %s file of the topsites.\n",general2); - exit(1); - } - if(strcmp(url,"TOTAL") == 0) { - if (getword(ttnacc,sizeof(ttnacc),buf,' ')<0) { - printf("SARG: Maybe you have an invalid total number of access in your %s file of the topsites.\n",general2); - exit(1); - } - if (getword(ttnbytes,sizeof(ttnbytes),buf,' ')<0) { - printf("SARG: Maybe you have an invalid total number of bytes in your %s file of the topsites.\n",general2); - exit(1); - } - if (getword(ttntime,sizeof(ttntime),buf,' ')<0) { - printf("SARG: Maybe you have an invalid total time in your %s file of the topsites.\n",general2); - exit(1); - } - continue; - } - if (getword(nacc,sizeof(nacc),buf,' ')<0) { - printf("SARG: Maybe you have an invalid number of access in your %s file of the topsites.\n",general2); - exit(1); - } - if (getword(nbytes,sizeof(nbytes),buf,' ')<0) { - printf("SARG: Maybe you have an invalid number of bytes in your %s file of the topsites.\n",general2); - exit(1); - } - if (getword(url,sizeof(url),buf,' ')<0) { - printf("SARG: Maybe you have an invalid url in your %s file of the topsites.\n",general2); - exit(1); - } - if (getword(ntemp,sizeof(ntemp),buf,' ')<0) { - printf("SARG: Maybe you have a broken record or garbage in column 5 in your %s file of the topsites.\n",general2); - exit(1); - } - if (getword(ntemp,sizeof(ntemp),buf,' ')<0) { - printf("SARG: Maybe you have a broken record or garbage in column 6 in your %s file of the topsites.\n",general2); - exit(1); - } - if (getword(ntemp,sizeof(ntemp),buf,' ')<0) { - printf("SARG: Maybe you have a broken record or garbage in column 7 in your %s file of the topsites.\n",general2); - exit(1); - } - if (getword(ntime,sizeof(ntime),buf,' ')<0) { - printf("SARG: Maybe you have a broken record or garbage in column 8 in your %s file of the topsites.\n",general2); - exit(1); - } - - if(!regs) { - strcpy(ourl,url); - regs++; - } - - if(strcmp(url,ourl) != 0) { - my_lltoa(tnacc,val1,15); - my_lltoa(tnbytes,val2,15); - my_lltoa(tntime,val3,15); - fprintf(fp_ou,"%s %s %s %s\n",val1,val2,val3,ourl); - strcpy(ourl,url); - tnacc=0; - tnbytes=0; - tntime=0; - } - - tnacc+=my_atoll(nacc); - tnbytes+=my_atoll(nbytes); - tntime+=my_atoll(ntime); - } - - my_lltoa(tnacc,val1,15); - my_lltoa(tnbytes,val2,15); - my_lltoa(tntime,val3,15); - fprintf(fp_ou,"%s %s %s %s\n",val1,val2,val3,ourl); - - fclose(fp_in); - fclose(fp_ou); - unlink(general2); - - strlow(TopsitesSortField); - strlow(TopsitesSortType); - - if(strcmp(TopsitesSortField,"connect") == 0) - strcpy(sortf,"1,1"); - if(strcmp(TopsitesSortField,"bytes") == 0) - strcpy(sortf,"2,2"); - if(strcmp(TopsitesSortType,"a") == 0) - strcpy(sortt," "); - if(strcmp(TopsitesSortType,"d") == 0) - strcpy(sortt,"-r"); - - sprintf(csort,"sort %s -k %s -o \"%s\" \"%s\"",sortt,sortf,sites,general3); - cstatus=system(csort); - if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) { - fprintf(stderr, "SARG: sort command return status %d\n",WEXITSTATUS(cstatus)); - fprintf(stderr, "SARG: sort command: %s\n",csort); - exit(1); - } - if((fp_in=fopen(sites,"r"))==NULL) { - fprintf(stderr, "SARG: (topsite) %s: %s\n",text[8],sites); - fprintf(stderr, "SARG: sort command: %s\n",csort); - exit(1); - } - - unlink(general2); - unlink(general3); - - if((fp_ou=fopen(report,"w"))==NULL) { - fprintf(stderr, "SARG: (topsite) %s: %s\n",text[8],report); - exit(1); - } - - regs=0; - - fprintf(fp_ou, "\n\n\n \n",CharSet); - css(fp_ou); - fputs("\n",fp_ou); - fprintf(fp_ou,"\n",BgColor,TxColor,BgImage); - write_logo_image(fp_ou); - - if(strcmp(IndexTree,"date") == 0) - show_sarg(fp_ou,"../../.."); - else - show_sarg(fp_ou, ".."); - - fputs("
\n",fp_ou); - fprintf(fp_ou,"\n",Title); - - fprintf(fp_ou,"\n",text[89],period); - fprintf(fp_ou,"\n",text[83],TopSitesNum,text[84]); - fputs("
%s
%s: %s
%s %s %s
\n",fp_ou); - - fputs("
\n",fp_ou); - fputs("\n",fp_ou); - fprintf(fp_ou," \ - \ - \n", \ - text[100],text[91],text[92],text[93],text[99]); - - regs=1; - ntopsites = 0; - - while(fgets(buf,sizeof(buf),fp_in)!=NULL) { - if(regs>atoi(TopSitesNum)) - break; - if (getword(nacc,sizeof(nacc),buf,' ')<0) { - printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",sites); - exit(1); - } - if (atoi(nacc) == 0) continue; - if (getword(nbytes,sizeof(nbytes),buf,' ')<0 || - getword(ntime,sizeof(ntime),buf,' ')<0 || getword(url,sizeof(url),buf,' ')<0) { - printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",sites); - exit(1); - } - - twork1=my_atoll(nacc); - twork2=my_atoll(nbytes); - twork3=my_atoll(ntime); - - strcpy(wwork1,fixnum(twork1,1)); - strcpy(wwork2,fixnum(twork2,1)); - strcpy(wwork3,fixtime(twork3)); - - if(strlen(BlockIt) > 0) - sprintf(BlockImage," ",wwwDocumentRoot,BlockIt,url); - else BlockImage[0]='\0'; - - - fprintf(fp_ou,"\n",regs,BlockImage,url,url,wwork1,wwork2,wwork3); - regs++; - } - - - fputs("
%s%s%s%s%s
%d%s%s%s%s%s
\n
\n",fp_ou); - - show_info(fp_ou); - - fputs("\n\n",fp_ou); - - fclose(fp_in); - fclose(fp_ou); - - return; - + FileObject *fp_in; + FILE *fp_ou; + + char *buf; + char *url; + char *ourl=NULL; + char csort[4096]; + char general[MAXLEN]; + char general2[MAXLEN]; + char general3[MAXLEN]; + char sites[MAXLEN]; + char report[MAXLEN]; + char ouser[MAX_USER_LEN]=""; + const char *sortf; + const char *sortt; + long long int nacc; + long long int nbytes; + long long int ntime; + long long int tnacc=0; + long long int tnbytes=0; + long long int tntime=0; + long long int twork1=0, twork2=0, twork3=0; +#ifdef ENABLE_DOUBLE_CHECK_DATA + long long int ttnacc=0; + long long int ttnbytes=0; + long long int ttntime=0; +#endif + int nusers=0; + int regs=0; + int cstatus; + int url_len; + int ourl_size=0; + struct getwordstruct gwarea; + longline line; + struct generalitemstruct item; + + if(Privacy) { + if (debugz>=LogLevel_Process) debugaz(__FILE__,__LINE__,_("Top sites report not produced because privacy option is on\n")); + return; + } + if (debugz>=LogLevel_Process) + debuga(__FILE__,__LINE__,_("Creating top sites report...\n")); + + sprintf(general,"%s/sarg-general",outdirname); + sprintf(sites,"%s/sarg-sites",outdirname); + sprintf(general2,"%s/sarg-general2",outdirname); + sprintf(general3,"%s/sarg-general3",outdirname); + + sprintf(report,"%s/topsites.html",outdirname); + + if (snprintf(csort,sizeof(csort),"sort -t \"\t\" -k 4,4 -k 1,1 -o \"%s\" \"%s\"",general2,general)>=sizeof(csort)) { + debuga(__FILE__,__LINE__,_("Sort command too long when sorting file \"%s\" to \"%s\"\n"),general,general2); + exit(EXIT_FAILURE); + } + cstatus=system(csort); + if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) { + debuga(__FILE__,__LINE__,_("sort command return status %d\n"),WEXITSTATUS(cstatus)); + debuga(__FILE__,__LINE__,_("sort command: %s\n"),csort); + exit(EXIT_FAILURE); + } + + if((fp_in=FileObject_Open(general2))==NULL) { + debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),general2,FileObject_GetLastOpenError()); + debuga(__FILE__,__LINE__,_("sort command: %s\n"),csort); + exit(EXIT_FAILURE); + } + + if((fp_ou=fopen(general3,"w"))==NULL) { + debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),general3,strerror(errno)); + exit(EXIT_FAILURE); + } + + if ((line=longline_create())==NULL) { + debuga(__FILE__,__LINE__,_("Not enough memory to read file \"%s\"\n"),general2); + exit(EXIT_FAILURE); + } + + while((buf=longline_read(fp_in,line))!=NULL) { + ger_read(buf,&item,general2); + if(item.total) continue; + + if(!regs) { + url_len=strlen(item.url); + if (!ourl || url_len>=ourl_size) { + ourl_size=url_len+1; + ourl=realloc(ourl,ourl_size); + if (!ourl) { + debuga(__FILE__,__LINE__,_("Not enough memory to store the url\n")); + exit(EXIT_FAILURE); + } + } + strcpy(ourl,item.url); + regs++; + } + + if(strcmp(item.url,ourl) != 0) { + /* + This complicated printf is due to Microsoft's inability to comply with any standard. Msvcrt is unable + to print a long long int unless it is exactly 64-bits long. + */ + fprintf(fp_ou,"%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%d\t%s\n",(uint64_t)tnacc,(uint64_t)tnbytes,(uint64_t)tntime,nusers,ourl); + url_len=strlen(item.url); + if (url_len>=ourl_size) { + ourl_size=url_len+1; + ourl=realloc(ourl,ourl_size); + if (!ourl) { + debuga(__FILE__,__LINE__,_("Not enough memory to store the url\n")); + exit(EXIT_FAILURE); + } + } + strcpy(ourl,item.url); + strcpy(ouser,item.user); + tnacc=0; + tnbytes=0; + tntime=0; + nusers=1; + } else if (strcmp(item.user,ouser)!=0) { + strcpy(ouser,item.user); + nusers++; + } + + tnacc+=item.nacc; + tnbytes+=item.nbytes; + tntime+=item.nelap; +#ifdef ENABLE_DOUBLE_CHECK_DATA + ttnacc+=item.nacc; + ttnbytes+=item.nbytes; + ttntime+=item.nelap; +#endif + } + if (FileObject_Close(fp_in)) { + debuga(__FILE__,__LINE__,_("Read error in \"%s\": %s\n"),general2,FileObject_GetLastCloseError()); + exit(EXIT_FAILURE); + } + longline_destroy(&line); + + if (ourl) { + /* + This complicated printf is due to Microsoft's inability to comply with any standard. Msvcrt is unable + to print a long long int unless it is exactly 64-bits long. + */ + fprintf(fp_ou,"%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%d\t%s\n",(uint64_t)tnacc,(uint64_t)tnbytes,(uint64_t)tntime,nusers,ourl); + free(ourl); + } + + if (fclose(fp_ou)==EOF) { + debuga(__FILE__,__LINE__,_("Write error in \"%s\": %s\n"),general3,strerror(errno)); + exit(EXIT_FAILURE); + } + +#ifdef ENABLE_DOUBLE_CHECK_DATA + if (ttnacc!=globstat.nacc || ttnbytes!=globstat.nbytes || ttntime!=globstat.elap) { + debuga(__FILE__,__LINE__,_("Total statistics mismatch when reading \"%s\" to produce the top sites\n"),general2); + exit(EXIT_FAILURE); + } +#endif + + if (!KeepTempLog && unlink(general2)) { + debuga(__FILE__,__LINE__,_("Cannot delete \"%s\": %s\n"),general2,strerror(errno)); + exit(EXIT_FAILURE); + } + + if((TopsitesSort & TOPSITE_SORT_CONNECT) != 0) { + sortf="-k 1,1 -k 2,2"; + } else if((TopsitesSort & TOPSITE_SORT_BYTES) != 0) { + sortf="-k 2,2 -k 1,1"; + } else if((TopsitesSort & TOPSITE_SORT_TIME) != 0) { + sortf="-k 3,3"; + } else if((TopsitesSort & TOPSITE_SORT_USER) != 0) { + sortf="-k 4,4 -k 1,1 -k 2,2"; + } else { + sortf="-k 2,2 -k 1,1"; //default is BYTES + } + if((TopsitesSort & TOPSITE_SORT_REVERSE) != 0) { + sortt="-r"; + } else { + sortt=""; + } + + if (snprintf(csort,sizeof(csort),"sort -t \"\t\" %s -n %s -o \"%s\" \"%s\"",sortt,sortf,sites,general3)>=sizeof(csort)) { + debuga(__FILE__,__LINE__,_("Sort command too long when sorting file \"%s\" to \"%s\"\n"),general3,sites); + exit(EXIT_FAILURE); + } + cstatus=system(csort); + if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) { + debuga(__FILE__,__LINE__,_("sort command return status %d\n"),WEXITSTATUS(cstatus)); + debuga(__FILE__,__LINE__,_("sort command: %s\n"),csort); + exit(EXIT_FAILURE); + } + if((fp_in=FileObject_Open(sites))==NULL) { + debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),sites,FileObject_GetLastOpenError()); + debuga(__FILE__,__LINE__,_("sort command: %s\n"),csort); + exit(EXIT_FAILURE); + } + + if (!KeepTempLog && unlink(general3)) { + debuga(__FILE__,__LINE__,_("Cannot delete \"%s\": %s\n"),general3,strerror(errno)); + exit(EXIT_FAILURE); + } + + if((fp_ou=fopen(report,"w"))==NULL) { + debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),report,strerror(errno)); + exit(EXIT_FAILURE); + } + + write_html_header(fp_ou,(IndexTree == INDEX_TREE_DATE) ? 3 : 1,_("Top sites"),HTML_JS_SORTTABLE); + fputs("",fp_ou); + fprintf(fp_ou,_("Period: %s"),period.html); + fputs("\n",fp_ou); + fputs("",fp_ou); + fprintf(fp_ou,_("Top %d sites"),TopSitesNum); + fputs("\n",fp_ou); + close_html_header(fp_ou); + + fputs("
\n",fp_ou); + fprintf(fp_ou,"\n", + /* TRANSLATORS: This is a column header showing the URL of the visited sites. */ + _("ACCESSED SITE"), + /* TRANSLATORS: This is a column header showing the number of connections to a visited site. */ + _("CONNECT"), + /* TRANSLATORS: This is a column header showing the number of transfered bytes. */ + _("BYTES"), + /* TRANSLATORS: This is a column header showing the time spent by the proxy processing the requests. */ + pgettext("duration","TIME"), + /* TRANSLATORS: This is a column header showing the number of users who visited a sites. */ + _("USERS")); + + regs=0; + ntopsites = 0; + + if ((line=longline_create())==NULL) { + debuga(__FILE__,__LINE__,_("Not enough memory to read file \"%s\"\n"),sites); + exit(EXIT_FAILURE); + } + + while(regs",fixnum(twork1,1)); + fputs("",fixnum(twork2,1)); + fputs("",fixtime(twork3)); + fputs("\n",fixnum(nusers,1)); + } + if (FileObject_Close(fp_in)) { + debuga(__FILE__,__LINE__,_("Read error in \"%s\": %s\n"),sites,FileObject_GetLastCloseError()); + exit(EXIT_FAILURE); + } + longline_destroy(&line); + + fputs("
%s%s%s%s%s%s
%d",++regs); + + if(BlockIt[0] != '\0' && url[0]!=ALIAS_PREFIX) { + fprintf(fp_ou," ",fp_ou); + } + + output_html_link(fp_ou,url,100); + fputs("%s%s%s%s
\n",fp_ou); + write_html_trailer(fp_ou); + if (fclose(fp_ou)==EOF) { + debuga(__FILE__,__LINE__,_("Write error in \"%s\": %s\n"),report,strerror(errno)); + exit(EXIT_FAILURE); + } + + return; }