From: Frédéric Marchal Date: Fri, 22 Jun 2012 19:37:33 +0000 (+0200) Subject: Move the log reading to a separate source file X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=27d1fa35072fa2c092a5a4546d51b126d42dd548;p=thirdparty%2Fsarg.git Move the log reading to a separate source file First stage to factorize the huge main function. --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 4773043..5851233 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,7 +54,7 @@ SET(SRC util.c log.c report.c topuser.c email.c sort.c html.c smartfilter.c denied.c authfail.c charset.c dichotomic.c redirector.c auth.c download.c grepday.c ip2name_exec.c dansguardian_log.c dansguardian_report.c realtime.c btree_cache.c - usertab.c userinfo.c longline.c url.c) + usertab.c userinfo.c longline.c url.c readlog.c) FOREACH(f ${SRC}) ADD_FILE_DEPENDENCIES(${f} ${CMAKE_BINARY_DIR}/config.h ${CMAKE_SOURCE_DIR}/include/conf.h ${CMAKE_SOURCE_DIR}/include/info.h ${CMAKE_SOURCE_DIR}/include/defs.h) diff --git a/Makefile.in b/Makefile.in index 42e7c58..3bbf0b5 100644 --- a/Makefile.in +++ b/Makefile.in @@ -38,7 +38,7 @@ SRCS = util.c log.c report.c topuser.c email.c sort.c html.c \ smartfilter.c denied.c authfail.c charset.c dichotomic.c \ redirector.c auth.c download.c grepday.c ip2name_exec.c \ dansguardian_log.c dansguardian_report.c realtime.c btree_cache.c \ - usertab.c userinfo.c longline.c url.c + usertab.c userinfo.c longline.c url.c readlog.c OBJS = $(SRCS:.c=.o) diff --git a/include/conf.h b/include/conf.h index b87558d..4d8598e 100755 --- a/include/conf.h +++ b/include/conf.h @@ -439,7 +439,6 @@ char SortTableJs[256]; char HostAliasFile[512]; int idate; -int smartfilter; int denied_count; int download_count; int authfail_count; diff --git a/include/defs.h b/include/defs.h index 69acaca..8025388 100755 --- a/include/defs.h +++ b/include/defs.h @@ -87,6 +87,27 @@ struct globalstatstruct //! The object to store the daily statistics. typedef struct DayStruct *DayObject; +/*! +\brief Log filtering criterion. +*/ +struct ReadLogDataStruct +{ + //! The filtering date range. + char DateRange[255]; + //! \c True to filter on hosts. + bool HostFilter; + //! \c True to filter on users. + bool UserFilter; + //! Maximum elpased time allowed. Any time greater than this value is set to zero. + long int max_elapsed; + //! \c True to restrict the log to the system users. + bool SysUsers; + //! The start time to include in the report(H*100+M). Set to -1 to disable. + int StartTime; + //! The end time to include in the report(H*100+M). Set to -1 to disable. + int EndTime; +}; + // auth.c void htaccess(const struct userinfostruct *uinfo); @@ -168,6 +189,9 @@ void longline_destroy(/*@out@*//*@only@*//*@null@*/longline *line_ptr); // index.c void make_index(void); +// readlog.c +int ReadLogFile(struct ReadLogDataStruct *Filter); + // realtime.c void realtime(void); diff --git a/include/readlog.h b/include/readlog.h new file mode 100644 index 0000000..a6bf38e --- /dev/null +++ b/include/readlog.h @@ -0,0 +1,38 @@ +#ifndef READLOG_HEADER +#define READLOG_HEADER + +/*! +\brief Possible return codes for the functions parsing the input log. +*/ +enum ReadLogReturnCodeEnum +{ + //! Line successfuly read. + RLRC_NoError, + //! Unknown line format. + RLRC_Unknown, + + RLRC_LastRetCode //!< last entry of the list. +}; + +/*! +\brief Data read from an input log file. +*/ +struct ReadLogStruct +{ + //! The time corresponding to the entry. + struct tm *EntryTime; + //! The IP address connecting to internet + char *Ip; + //! The user's name. + char *User; + //! The URL of the visited site. + char *Url; + //! Time necessary to process the user's request. + unsigned int ElapsedTime; + //! Number of transfered bytes. + unsigned long int DataSize; + //! HTTP code returned to the user for the entry. + char *HttpCode; +}; + +#endif //READLOG_HEADER diff --git a/log.c b/log.c index bcc1aed..6d0a9c0 100644 --- a/log.c +++ b/log.c @@ -26,134 +26,40 @@ #include "include/conf.h" #include "include/defs.h" +#include "include/readlog.h" #ifdef HAVE_GETOPT_H #include #endif -#define REPORT_EVERY_X_LINES 5000 -#define MAX_OPEN_USER_FILES 10 +//! The log file filtering. +struct ReadLogDataStruct ReadFilter; +char denied_sort[MAXLEN]; -struct userfilestruct -{ - struct userfilestruct *next; - struct userinfostruct *user; - FILE *file; -}; - -/*@null@*/static char *userfile=NULL; - -numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 }; -numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 }; +//! The list of the system users. +/*@null@*/char *userfile=NULL; static void getusers(const char *pwdfile, int debug); int main(int argc,char *argv[]) { - enum isa_col_id { - ISACOL_Ip, - ISACOL_UserName, - ISACOL_Date, - ISACOL_Time, - ISACOL_TimeTaken, - ISACOL_Bytes, - ISACOL_Uri, - ISACOL_Status, - ISACOL_Last //last entry of the list ! - }; - enum InputLogFormat { - ILF_Unknown, - ILF_Squid, - ILF_Common, - ILF_Sarg, - ILF_Isa, - ILF_Last //last entry of the list ! - }; - - FILE *fp_in = NULL, *fp_denied=NULL, *fp_authfail=NULL, *fp_log=NULL; - - char sz_Download_Unsort[ 20000 ] ; - FILE * fp_Download_Unsort = NULL ; - extern int optind; extern int optopt; extern char *optarg; - char data[255]; - char elap[255]; - char ip[MAXLEN]; - char tam[255]; - char fun[MAXLEN]; - char wuser[MAXLEN]; - char smartfilter[MAXLEN]; - char dia[128]; - char mes[30]; - char hora[30]; - char date[255]; - char arq[255]; - char arq_log[255]; - int hm, hmf, hmr; char hm_str[15]; char uagent[MAXLEN]; char hexclude[MAXLEN]; - char csort[MAXLEN]; - int cstatus; - char tbuf2[128]; - char *str; - char tmp3[MAXLEN]; - char denied_unsort[MAXLEN]; - char denied_sort[MAXLEN]; - char authfail_unsort[MAXLEN]; - char start_hour[128]; - char *linebuf; - const char *url; - char *full_url; - char user[MAX_USER_LEN]; char splitprefix[MAXLEN]; - enum InputLogFormat ilf; - int ilf_count[ILF_Last]; int ch; int x; int errflg=0; - int puser=0; - bool fhost=false; bool dns=false; - bool fuser=false; - int idata=0; - int mindate=0; - int maxdate=0; int iarq=0; - int isa_ncols=0,isa_cols[ISACOL_Last]; int lastlog=-1; - long int nbytes; - long int elap_time; - bool from_stdin; - bool from_pipe; - int blen; - int maxopenfiles; - int nopen; - bool id_is_ip; - long totregsl=0; - long totregsg=0; - long totregsx=0; - bool totper=false; - long int max_elapsed=0; - long long int iyear, imonth, iday; + int LogStatus; bool realt; bool userip; - struct tm tt; - struct tm *t; - unsigned long recs1=0UL; - unsigned long recs2=0UL; - int OutputNonZero = REPORT_EVERY_X_LINES ; - bool download_flag=false; - char download_url[MAXLEN]; - struct getwordstruct gwarea; - longline line; - time_t tnum; - struct stat logstat; - struct userinfostruct *uinfo; - struct userfilestruct *first_user_file, *ufile, *ufile1, *prev_ufile; static int split=0; static int convert=0; static int output_css=0; @@ -206,7 +112,6 @@ int main(int argc,char *argv[]) ExternalCSSFile[0]='\0'; RedirectorLogFormat[0]='\0'; NRedirectorLogs=0; - for (ilf=0 ; ilftm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday0) { - for (i=0 ; i=sizeof(arq_log)) { - debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog); - exit(EXIT_FAILURE); - } - if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) { - debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno)); - exit(EXIT_FAILURE); - } - fputs("*** SARG Log ***\n",fp_log); - } - - recs2++; - if( ShowReadStatistics && !from_stdin && !from_pipe && --OutputNonZero<=0) { - double perc = recs2 * 100. / recs1 ; - printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc); - putchar('\r'); - fflush (stdout); - OutputNonZero = REPORT_EVERY_X_LINES ; - } - if(blen < 58) continue; - if(strstr(linebuf,"HTTP/0.0") != 0) continue; - if(strstr(linebuf,"logfile turned over") != 0) continue; - if(linebuf[0] == ' ') continue; - - // exclude_string - if(ExcludeString[0] != '\0') { - bool exstring=false; - getword_start(&gwarea,ExcludeString); - while(strchr(gwarea.current,':') != 0) { - if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) { - debuga(_("Maybe you have a broken record or garbage in your exclusion string\n")); - exit(EXIT_FAILURE); - } - if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) { - exstring=true; - break; - } - } - if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL ) - exstring=true; - if(exstring) continue; - } - - totregsl++; - if(debugm) - printf("BUF=%s\n",linebuf); - - t=NULL; - if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) { - getword_start(&gwarea,linebuf); - if (getword(data,sizeof(data),&gwarea,' ')<0) { - debuga(_("Maybe you have a broken time in your access.log file\n")); - exit(EXIT_FAILURE); - } - if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) { - strcpy(ip,data); - strcpy(elap,"0"); - if(squid24) { - if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) { - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - } else { - if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) { - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - } - if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 || - getword(fun,sizeof(fun),&gwarea,' ')<0) { - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword_ptr(linebuf,&full_url,&gwarea,' ')<0) { - debuga(_("Maybe you have a broken url in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword_skip(MAXLEN,&gwarea,' ')<0) { - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword(code2,sizeof(code2),&gwarea,' ')<0) { - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword(tam,sizeof(tam),&gwarea,' ')<0) { - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) { - if (getword(code,sizeof(code),&gwarea,' ')<0) { - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - } else { - if (getword(code,sizeof(code),&gwarea,'\0')<0) { - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - } - - if ((str = strchr(code, ':')) != NULL) - *str = '/'; - - if(strcmp(tam,"\0") == 0) - strcpy(tam,"0"); - - ilf=ILF_Common; - ilf_count[ilf]++; - - getword_start(&gwarea,data+1); - if (getword_multisep(data,sizeof(data),&gwarea,':')<0){ - debuga(_("Maybe you have a broken date in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){ - debuga(_("Maybe you have a broken date in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - getword_start(&gwarea,data); - if (getword_atoll(&iday,&gwarea,'/')<0){ - debuga(_("Maybe you have a broken date in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword(mes,sizeof(mes),&gwarea,'/')<0){ - debuga(_("Maybe you have a broken date in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword_atoll(&iyear,&gwarea,'/')<0){ - debuga(_("Maybe you have a broken date in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - - imonth=month2num(mes)+1; - idata=builddia(iday,imonth,iyear); - computedate(iyear,imonth,iday,&tt); - if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 || - tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) { - debuga(_("Invalid time found in %s\n"),arq); - exit(EXIT_FAILURE); - } - t=&tt; - } - - if(ilf==ILF_Unknown || ilf==ILF_Squid) { - if (getword(elap,sizeof(elap),&gwarea,' ')<0) { - debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0') - if (getword(elap,sizeof(elap),&gwarea,' ')<0) { - debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if(strlen(elap) < 1) continue; - if (getword(ip,sizeof(ip),&gwarea,' ')<0){ - debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword(code,sizeof(code),&gwarea,' ')<0){ - debuga(_("Maybe you have a broken result code in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword(tam,sizeof(tam),&gwarea,' ')<0){ - debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword(fun,sizeof(fun),&gwarea,' ')<0){ - debuga(_("Maybe you have a broken request method in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword_ptr(linebuf,&full_url,&gwarea,' ')<0){ - debuga(_("Maybe you have a broken url in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword(user,sizeof(user),&gwarea,' ')<0){ - debuga(_("Maybe you have a broken user ID in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - ilf=ILF_Squid; - ilf_count[ilf]++; - - tnum=atoi(data); - t=localtime(&tnum); - if (t == NULL) { - debuga(_("Cannot convert the timestamp from the squid log file\n")); - exit(EXIT_FAILURE); - } - - strftime(tbuf2, sizeof(tbuf2), "%H%M", t); - - idata=(t->tm_year+1900)*10000+(t->tm_mon+1)*100+t->tm_mday; - } - } - if (ilf==ILF_Sarg) { - getword_start(&gwarea,linebuf); - if (getword(data,sizeof(data),&gwarea,'\t')<0){ - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword(hora,sizeof(hora),&gwarea,'\t')<0) { - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword(user,sizeof(user),&gwarea,'\t')<0) { - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword(ip,sizeof(ip),&gwarea,'\t')<0) { - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword_ptr(linebuf,&full_url,&gwarea,'\t')<0){ - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword(tam,sizeof(tam),&gwarea,'\t')<0){ - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword(code,sizeof(code),&gwarea,'\t')<0){ - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword(elap,sizeof(elap),&gwarea,'\t')<0){ - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){ - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - getword_start(&gwarea,data); - if (getword_atoll(&iday,&gwarea,'/')<0 || iday<1 || iday>31){ - debuga(_("Maybe you have a broken date in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword_atoll(&imonth,&gwarea,'/')<0 || imonth<1 || imonth>12){ - debuga(_("Maybe you have a broken date in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword_atoll(&iyear,&gwarea,'\0')<0){ - debuga(_("Maybe you have a broken date in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - idata=builddia(iday,imonth,iyear); - computedate(iyear,imonth,iday,&tt); - if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 || - tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) { - debuga(_("Invalid time found in %s\n"),arq); - exit(EXIT_FAILURE); - } - t=&tt; - } - if (ilf==ILF_Isa) { - if (linebuf[0] == '#') { - int ncols,cols[ISACOL_Last]; - - fixendofline(linebuf); - getword_start(&gwarea,linebuf); - // remove the #Fields: column at the beginning of the line - if (getword_skip(1000,&gwarea,' ')<0){ - debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - for (ncols=0 ; ncols=0) { - isa_ncols=ncols; - for (ncols=0 ; ncols=sizeof(ip)) { - debuga(_("Maybe you have a broken IP in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - strcpy(ip,str); - } else if (x==isa_cols[ISACOL_UserName]) { - if (strlen(str)>=sizeof(user)) { - debuga(_("Maybe you have a broken user ID in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - strcpy(user,str); - } else if (x==isa_cols[ISACOL_Date]) { - if (strlen(str)>=sizeof(data)) { - debuga(_("Maybe you have a broken date in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - strcpy(data,str); - } else if (x==isa_cols[ISACOL_Time]) { - if (strlen(str)>=sizeof(hora)) { - debuga(_("Maybe you have a broken time in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - strcpy(hora,str); - } else if (x==isa_cols[ISACOL_TimeTaken]) { - if (strlen(str)>=sizeof(elap)) { - debuga(_("Maybe you have a broken download duration in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - strcpy(elap,str); - } else if (x==isa_cols[ISACOL_Bytes]) { - if (strlen(str)>=sizeof(tam)) { - debuga(_("Maybe you have a broken download size in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - strcpy(tam,str); - } else if (x==isa_cols[ISACOL_Uri]) { - full_url=str; - } else if (x==isa_cols[ISACOL_Status]) { - if (strlen(str)>=sizeof(code)) { - debuga(_("Maybe you have a broken access code in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - strcpy(code,str); - } - } - - if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) { - sprintf(val1,"DENIED/%s",code); - strcpy(code,val1); - } - getword_start(&gwarea,data); - if (getword_atoll(&iyear,&gwarea,'-')<0){ - debuga(_("Maybe you have a broken year in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword_atoll(&imonth,&gwarea,'-')<0){ - debuga(_("Maybe you have a broken month in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - if (getword_atoll(&iday,&gwarea,'\0')<0){ - debuga(_("Maybe you have a broken day in your %s file\n"),arq); - exit(EXIT_FAILURE); - } - - idata=builddia(iday,imonth,iyear); - computedate(iyear,imonth,iday,&tt); - if (isa_cols[ISACOL_Time]>=0) { - if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 || - tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) { - debuga(_("Invalid time found in %s\n"),arq); - exit(EXIT_FAILURE); - } - } - t=&tt; - } - if (t==NULL) { - debuga(_("Unknown input log file format\n")); - break; - } - - strftime(dia, sizeof(dia), "%d/%m/%Y", t); - snprintf(hora,sizeof(hora),"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec); - - if(debugm) - printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",date,idata,dfrom,duntil); - - if(date[0] != '\0'){ - if(idata < dfrom || idata > duntil) continue; - } - - // Record only hours usage which is required - if (t) { - if( bsearch( &( t -> tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL ) - continue; - - if( bsearch( &( t -> tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL ) - continue; - } - - - if(strlen(user) > MAX_USER_LEN) { - if (debugm) printf(_("User ID too long: %s\n"),user); - totregsx++; - continue; - } - - // include_users - if(IncludeUsers[0] != '\0') { - snprintf(val1,sizeof(val1),":%s:",user); - if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL ) - continue; - } - - if(vercode(code)) { - if (debugm) printf(_("Excluded code: %s\n"),code); - totregsx++; - continue; - } - - if(testvaliduserchar(user)) - continue; - -#if 0 - if((str = strstr(user,"%20")) != NULL) { - /* - This is a patch introduced to solve bug #1624251 reported at sourceforge but - the side effect is to truncate the name at the first space and merge the reports - of people whose name is identical up to the first space. - - The old code used to truncate the user name at the first % if a %20 was - found anywhere in the string. That means the string could be truncated - at the wrong place if another % occured before the %20. This new code should - avoid that problem and only truncate at the space. There is no bug - report indicating that anybody noticed this. - */ - *str='\0'; - } - - /* - Code prior to 2.2.7 used to replace any %xx by a dot as long as a %5c was - found in the user name. - */ - while((str = strstr(user,"%5c")) != NULL) { - *str='.'; - for (x=3 ; str[x] ; x++) str[x-2]=str[x]; - } -#endif - // replace any tab by a single space - for (str=full_url ; *str ; str++) - if (*str=='\t') *str=' '; - for (str=code ; *str ; str++) - if (*str=='\t') *str=' '; - - if(ilf!=ILF_Sarg) { - /* - The full URL is not saved in sarg log. There is no point in testing the URL to detect - a downloaded file. - */ - download_flag=is_download_suffix(full_url); - if (download_flag) { - safe_strcpy(download_url,full_url,sizeof(download_url)); - download_count++; - } - } else - download_flag=false; - - url=process_url(full_url,LongUrl); - if (!url || url[0] == '\0') continue; - - if(addr[0] != '\0'){ - if(strcmp(addr,ip)!=0) continue; - } - if(fhost) { - if(!vhexclude(url)) { - if (debugm) printf(_("Excluded site: %s\n"),url); - totregsx++; - continue; - } - } - - if(hm >= 0 && hmf >= 0) { - hmr=t->tm_hour*100+t->tm_min; - if(hmr < hm || hmr > hmf) continue; - } - - if(site[0] != '\0'){ - if(strstr(url,site)==0) continue; - } - - if(UserIp) { - strcpy(user,ip); - id_is_ip=true; - } else { - id_is_ip=false; - if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) { - if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) { - strcpy(user,ip); - id_is_ip=true; - } - if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE) - continue; - if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY) - strcpy(user,"everybody"); - } else { - strlow(user); - if(NtlmUserFormat == NTLMUSERFORMAT_USER) { - if((str = strchr(user,'_')) != 0) { - strcpy(warea,str+1); - strcpy(user,warea); - } - if((str = strchr(user,'+')) != 0) { - strcpy(warea,str+1); - strcpy(user,warea); - } - } - } - } - - if(us[0] != '\0'){ - if(strcmp(user,us)!=0) continue; - } - - if(puser) { - snprintf(wuser,sizeof(wuser),":%s:",user); - if(strstr(userfile, wuser) == 0) - continue; - } - - if(fuser) { - if(!vuexclude(user)) { - if (debugm) printf(_("Excluded user: %s\n"),user); - totregsx++; - continue; - } - } - - if(strcmp(user,"-") ==0 || strcmp(user," ") ==0 || strcmp(user,"") ==0 || strcmp(user,":") ==0) - continue; - - nbytes=atol(tam); - if (nbytes<0) nbytes=0; - - elap_time=atol(elap); - if (elap_time<0) elap_time=0; - if(max_elapsed) { - if(elap_time>max_elapsed) { - elap_time=0; - } - } - - if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) { - fixendofline(str); - snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1); - } else strcpy(smartfilter,"\"\""); - - nopen=0; - prev_ufile=NULL; - for (ufile=first_user_file ; ufile && strcmp(user,ufile->user->id)!=0 ; ufile=ufile->next) { - prev_ufile=ufile; - if (ufile->file) nopen++; - } - if (!ufile) { - ufile=malloc(sizeof(*ufile)); - if (!ufile) { - debuga(_("Not enough memory to store the user %s\n"),user); - exit(EXIT_FAILURE); - } - memset(ufile,0,sizeof(*ufile)); - ufile->next=first_user_file; - first_user_file=ufile; - uinfo=userinfo_create(user); - ufile->user=uinfo; - uinfo->id_is_ip=id_is_ip; - } else { - if (prev_ufile) { - prev_ufile->next=ufile->next; - ufile->next=first_user_file; - first_user_file=ufile; - } - } -#ifdef ENABLE_DOUBLE_CHECK_DATA - ufile->user->nbytes+=nbytes; - ufile->user->elap+=elap_time; -#endif - - if (ufile->file==NULL) { - if (nopen>=maxopenfiles) { - x=0; - for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) { - if (ufile1->file!=NULL) { - if (x>=maxopenfiles) { - if (fclose(ufile1->file)==EOF) { - debuga(_("Failed to close the log file of user %s - %s\n"),ufile1->user->id,strerror(errno)); - exit(EXIT_FAILURE); - } - ufile1->file=NULL; - } - x++; - } - } - } - if (snprintf (tmp3, sizeof(tmp3), "%s/%s.unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) { - debuga(_("Temporary user file name too long: %s/%s.unsort\n"), tmp, ufile->user->filename); - exit(EXIT_FAILURE); - } - if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) { - debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno)); - exit (1); - } - } - - /*if ( strcmp ( user , sz_Last_User ) != 0 ) { - if ( fp_Write_User ) - fclose( fp_Write_User ) ; - sprintf (tmp3, "%s/%s.unsort", tmp, user); - - if ((fp_Write_User = MY_FOPEN (tmp3, "a")) == NULL) { - fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], _("Cannot open temporary file"), tmp3, strerror(errno)); - exit (1); - } - strcpy( sz_Last_User , user ) ; - }*/ - if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%ld\t%s\t%ld\t%s\n",dia,hora,ip,url,nbytes,code,elap_time,smartfilter)<=0) { - debuga(_("Write error in the log file of user %s\n"),user); - exit(EXIT_FAILURE); - } - - if(fp_log && ilf!=ILF_Sarg) - fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%ld\t%s\t%ld\t%s\n",dia,hora,user,ip,url,nbytes,code,elap_time,smartfilter); - - totregsg++; - - if(!dataonly && download_flag && strstr(code,"DENIED") == 0) { - ndownload = 1; - - if ( ! fp_Download_Unsort ) { - if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) { - debuga(_("(log) Cannot open temporary file: %s - %s\n"),sz_Download_Unsort, strerror(errno)); - exit (1); - } - } - fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,download_url); - } - - if((ReportType & REPORT_TYPE_DENIED) != 0) { - if(fp_denied && strstr(code,"DENIED/403") != 0) { - fprintf(fp_denied, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,full_url); - denied_count++; - } - } - if((ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) { - if(fp_authfail && (strstr(code,"DENIED/401") != 0 || strstr(code,"DENIED/407") != 0)) { - fprintf(fp_authfail, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,full_url); - authfail_count++; - } - } - - if (ilf!=ILF_Sarg) { - if(!totper || idatamaxdate) { - maxdate=idata; - memcpy(&period.end,t,sizeof(*t)); - } - totper=true; - } - - if(debugm){ - printf("IP=\t%s\n",ip); - printf("USER=\t%s\n",user); - printf("ELAP=\t%ld\n",elap_time); - printf("DATE=\t%s\n",dia); - printf("TIME=\t%s\n",hora); - printf("FUNC=\t%s\n",fun); - printf("URL=\t%s\n",url); - printf("CODE=\t%s\n",code); - printf("LEN=\t%ld\n",nbytes); - } - } - if (!from_stdin) { - if (from_pipe) - pclose(fp_in); - else { - fclose(fp_in); - if( ShowReadStatistics ) - printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs1, (float) 100 ); - } - } - } - - if (debug) - debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx); - - longline_destroy(&line); - if ( fp_Download_Unsort ) - fclose (fp_Download_Unsort); - - for (ufile=first_user_file ; ufile ; ufile=ufile1) { - ufile1=ufile->next; - if (ufile->file!=NULL) fclose(ufile->file); - free(ufile); - } + LogStatus=ReadLogFile(&ReadFilter); free_download(); free_excludecodes(); free_exclude(); - if(debug) { - int totalcount=0; - - for (ilf=0 ; ilf0 && ilf_count[ILF_Squid]>0) - debuga(_("Log with mixed records format (squid and common log)\n")); - - if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0) - debuga(_("Common log format\n")); - - if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0) - debuga(_("Squid log format\n")); - - if(ilf_count[ILF_Sarg]>0) - debuga(_("Sarg log format\n")); - - if(totalcount==0 && totregsg) - debuga(_("Log with invalid format\n")); - } - - if(!totregsg){ + if (!LogStatus){ debuga(_("No records found\n")); debuga(_("End\n")); - if(fp_denied) fclose(fp_denied); - if(fp_authfail) fclose(fp_authfail); userinfo_free(); if(userfile) free(userfile); close_usertab(); exit(EXIT_SUCCESS); } - if (date[0]!='\0') { + if (ReadFilter.DateRange[0]!='\0') { char date0[30], date1[30]; strftime(date0,sizeof(date0),"%d/%m/%Y",&period.start); @@ -1627,75 +694,9 @@ int main(int argc,char *argv[]) exit(EXIT_FAILURE); } - if(debugz){ - debugaz(_("date=%s\n"),dia); - debugaz(_("period=%s\n"),period.text); - } - if(debug) debuga(_("Period: %s\n"),period.text); - if(fp_denied) - fclose(fp_denied); - if(fp_authfail) - fclose(fp_authfail); - - if(fp_log != NULL) { - char end_hour[128]; - char val2[40]; - char val4[255];//val4 must not be bigger than arq_log without fixing the strcpy below - - fclose(fp_log); - safe_strcpy(end_hour,tbuf2,sizeof(end_hour)); - strftime(val2,sizeof(val2),"%d%m%Y",&period.start); - strftime(val1,sizeof(val1),"%d%m%Y",&period.end); - if (snprintf(val4,sizeof(val4),"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour)>=sizeof(val4)) { - debuga(_("File name too long: %s/sarg-%s_%s-%s_%s.log\n"),ParsedOutputLog,val2,start_hour,val1,end_hour); - exit(EXIT_FAILURE); - } - if (rename(arq_log,val4)) { - debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno)); - } else { - strcpy(arq_log,val4); - - if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') { - /* - No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are - necessary around the command name, put them in the configuration file. - */ - if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,arq_log)>=sizeof(val1)) { - debuga(_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,arq_log); - exit(EXIT_FAILURE); - } - cstatus=system(val1); - if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) { - debuga(_("command return status %d\n"),WEXITSTATUS(cstatus)); - debuga(_("command: %s\n"),val1); - exit(EXIT_FAILURE); - } - } - } - if(debug) - debuga(_("Sarg parsed log saved as %s\n"),arq_log); - } - - if(DataFile[0] == '\0' && (ReportType & REPORT_TYPE_DENIED) != 0) { - if (snprintf(csort,sizeof(csort),"sort -T \"%s\" -t \"\t\" -k 3,3 -k 5,5 -o \"%s\" \"%s\"",tmp,denied_sort,denied_unsort)>=sizeof(csort)) { - debuga(_("Sort command too long when sorting file \"%s\" to \"%s\"\n"),denied_unsort,denied_sort); - exit(EXIT_FAILURE); - } - cstatus=system(csort); - if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) { - debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus)); - debuga(_("sort command: %s\n"),csort); - exit(EXIT_FAILURE); - } - if (unlink(denied_unsort)) { - debuga(_("Cannot delete %s - %s\n"),denied_unsort,strerror(errno)); - exit(EXIT_FAILURE); - } - } - if(DataFile[0] != '\0') data_file(tmp); else diff --git a/readlog.c b/readlog.c new file mode 100644 index 0000000..0253f98 --- /dev/null +++ b/readlog.c @@ -0,0 +1,1051 @@ +/* + * SARG Squid Analysis Report Generator http://sarg.sourceforge.net + * 1998, 2012 + * + * SARG donations: + * please look at http://sarg.sourceforge.net/donations.php + * Support: + * http://sourceforge.net/projects/sarg/forums/forum/363374 + * --------------------------------------------------------------------- + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#include "include/conf.h" +#include "include/defs.h" +#include "include/readlog.h" + +#define REPORT_EVERY_X_LINES 5000 +#define MAX_OPEN_USER_FILES 10 + +struct userfilestruct +{ + struct userfilestruct *next; + struct userinfostruct *user; + FILE *file; +}; + +numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 }; +numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 }; + +extern char denied_sort[MAXLEN]; +extern char *userfile; + +/*! +Read the log files. + +\param Filter The filtering parameters for the file to load. + +\retval 1 Records found. +\retval 0 No record found. +*/ +int ReadLogFile(struct ReadLogDataStruct *Filter) +{ + enum isa_col_id { + ISACOL_Ip, + ISACOL_UserName, + ISACOL_Date, + ISACOL_Time, + ISACOL_TimeTaken, + ISACOL_Bytes, + ISACOL_Uri, + ISACOL_Status, + ISACOL_Last //last entry of the list ! + }; + enum InputLogFormat { + ILF_Unknown, + ILF_Squid, + ILF_Common, + ILF_Sarg, + ILF_Isa, + ILF_Last //last entry of the list ! + }; + + enum InputLogFormat ilf; + int ilf_count[ILF_Last]; + longline line; + char *linebuf; + char *str; + char *full_url; + char arq_log[255]; + char fun[MAXLEN]; + char elap[255]; + char user[MAX_USER_LEN]; + char data[255]; + char ip[MAXLEN]; + char tam[255]; + char hora[30]; + char mes[30]; + char tbuf2[128]; + char dia[128]; + char wuser[MAXLEN]; + char tmp3[MAXLEN]; + char sz_Download_Unsort[20000]; + char start_hour[128]; + char denied_unsort[MAXLEN]; + char authfail_unsort[MAXLEN]; + char csort[MAXLEN]; + char download_url[MAXLEN]; + char smartfilter[MAXLEN]; + const char *arq; + const char *url; + int iarq; + int blen; + int OutputNonZero = REPORT_EVERY_X_LINES ; + int idata=0; + int isa_ncols=0,isa_cols[ISACOL_Last]; + int x; + int hmr; + int nopen; + int maxopenfiles=MAX_OPEN_USER_FILES; + int mindate=0; + int maxdate=0; + int cstatus; + unsigned long int recs1=0UL; + unsigned long int recs2=0UL; + long int totregsl=0; + long int totregsg=0; + long int totregsx=0; + long int nbytes; + long int elap_time; + long long int iyear, imonth, iday; + FILE *fp_in=NULL; + FILE *fp_log=NULL; + FILE *fp_denied=NULL; + FILE *fp_authfail=NULL; + FILE *fp_Download_Unsort=NULL; + bool from_pipe; + bool from_stdin; + bool download_flag=false; + bool id_is_ip; + bool totper=false; + struct stat logstat; + struct getwordstruct gwarea; + struct tm *t; + struct tm tt; + struct userfilestruct *prev_ufile; + struct userinfostruct *uinfo; + struct userfilestruct *first_user_file=NULL; + struct userfilestruct *ufile; + struct userfilestruct *ufile1; + time_t tnum; + + for (ilf=0 ; ilfDateRange[0]!='\0') { + if (stat(arq,&logstat)!=0) { + debuga(_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno)); + } else { + struct tm *logtime=localtime(&logstat.st_mtime); + if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday0) { + for (i=0 ; i=sizeof(arq_log)) { + debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog); + exit(EXIT_FAILURE); + } + if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) { + debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno)); + exit(EXIT_FAILURE); + } + fputs("*** SARG Log ***\n",fp_log); + } + + recs2++; + if( ShowReadStatistics && !from_stdin && !from_pipe && --OutputNonZero<=0) { + double perc = recs2 * 100. / recs1 ; + printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc); + putchar('\r'); + fflush (stdout); + OutputNonZero = REPORT_EVERY_X_LINES ; + } + if(blen < 58) continue; + if(strstr(linebuf,"HTTP/0.0") != 0) continue; + if(strstr(linebuf,"logfile turned over") != 0) continue; + if(linebuf[0] == ' ') continue; + + // exclude_string + if(ExcludeString[0] != '\0') { + bool exstring=false; + getword_start(&gwarea,ExcludeString); + while(strchr(gwarea.current,':') != 0) { + if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) { + debuga(_("Maybe you have a broken record or garbage in your exclusion string\n")); + exit(EXIT_FAILURE); + } + if((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) { + exstring=true; + break; + } + } + if(!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL ) + exstring=true; + if(exstring) continue; + } + + totregsl++; + if(debugm) + printf("BUF=%s\n",linebuf); + + t=NULL; + if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) { + getword_start(&gwarea,linebuf); + if (getword(data,sizeof(data),&gwarea,' ')<0) { + debuga(_("Maybe you have a broken time in your access.log file\n")); + exit(EXIT_FAILURE); + } + if((str=(char *) strchr(data, '.')) != (char *) NULL && (str=(char *) strchr(str+1, '.')) != (char *) NULL ) { + strcpy(ip,data); + strcpy(elap,"0"); + if(squid24) { + if (getword(user,sizeof(user),&gwarea,' ')<0 || getword_skip(255,&gwarea,' ')<0) { + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + } else { + if (getword_skip(255,&gwarea,' ')<0 || getword(user,sizeof(user),&gwarea,' ')<0) { + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + } + if (getword(data,sizeof(data),&gwarea,']')<0 || getword_skip(MAXLEN,&gwarea,'"')<0 || + getword(fun,sizeof(fun),&gwarea,' ')<0) { + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword_ptr(linebuf,&full_url,&gwarea,' ')<0) { + debuga(_("Maybe you have a broken url in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword_skip(MAXLEN,&gwarea,' ')<0) { + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword(code2,sizeof(code2),&gwarea,' ')<0) { + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword(tam,sizeof(tam),&gwarea,' ')<0) { + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if((str=(char *) strchr(gwarea.current, ' ')) != (char *) NULL ) { + if (getword(code,sizeof(code),&gwarea,' ')<0) { + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + } else { + if (getword(code,sizeof(code),&gwarea,'\0')<0) { + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + } + + if ((str = strchr(code, ':')) != NULL) + *str = '/'; + + if(strcmp(tam,"\0") == 0) + strcpy(tam,"0"); + + ilf=ILF_Common; + ilf_count[ilf]++; + + getword_start(&gwarea,data+1); + if (getword_multisep(data,sizeof(data),&gwarea,':')<0){ + debuga(_("Maybe you have a broken date in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword_multisep(hora,sizeof(hora),&gwarea,' ')<0){ + debuga(_("Maybe you have a broken date in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + getword_start(&gwarea,data); + if (getword_atoll(&iday,&gwarea,'/')<0){ + debuga(_("Maybe you have a broken date in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword(mes,sizeof(mes),&gwarea,'/')<0){ + debuga(_("Maybe you have a broken date in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword_atoll(&iyear,&gwarea,'/')<0){ + debuga(_("Maybe you have a broken date in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + + imonth=month2num(mes)+1; + idata=builddia(iday,imonth,iyear); + computedate(iyear,imonth,iday,&tt); + if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 || + tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) { + debuga(_("Invalid time found in %s\n"),arq); + exit(EXIT_FAILURE); + } + t=&tt; + } + + if(ilf==ILF_Unknown || ilf==ILF_Squid) { + if (getword(elap,sizeof(elap),&gwarea,' ')<0) { + debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + while(strcmp(elap,"") == 0 && gwarea.current[0] != '\0') + if (getword(elap,sizeof(elap),&gwarea,' ')<0) { + debuga(_("Maybe you have a broken elapsed time in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if(strlen(elap) < 1) continue; + if (getword(ip,sizeof(ip),&gwarea,' ')<0){ + debuga(_("Maybe you have a broken client IP address in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword(code,sizeof(code),&gwarea,' ')<0){ + debuga(_("Maybe you have a broken result code in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword(tam,sizeof(tam),&gwarea,' ')<0){ + debuga(_("Maybe you have a broken amount of data in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword(fun,sizeof(fun),&gwarea,' ')<0){ + debuga(_("Maybe you have a broken request method in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword_ptr(linebuf,&full_url,&gwarea,' ')<0){ + debuga(_("Maybe you have a broken url in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword(user,sizeof(user),&gwarea,' ')<0){ + debuga(_("Maybe you have a broken user ID in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + ilf=ILF_Squid; + ilf_count[ilf]++; + + tnum=atoi(data); + t=localtime(&tnum); + if (t == NULL) { + debuga(_("Cannot convert the timestamp from the squid log file\n")); + exit(EXIT_FAILURE); + } + + strftime(tbuf2, sizeof(tbuf2), "%H%M", t); + + idata=(t->tm_year+1900)*10000+(t->tm_mon+1)*100+t->tm_mday; + } + } + if (ilf==ILF_Sarg) { + getword_start(&gwarea,linebuf); + if (getword(data,sizeof(data),&gwarea,'\t')<0){ + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword(hora,sizeof(hora),&gwarea,'\t')<0) { + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword(user,sizeof(user),&gwarea,'\t')<0) { + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword(ip,sizeof(ip),&gwarea,'\t')<0) { + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword_ptr(linebuf,&full_url,&gwarea,'\t')<0){ + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword(tam,sizeof(tam),&gwarea,'\t')<0){ + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword(code,sizeof(code),&gwarea,'\t')<0){ + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword(elap,sizeof(elap),&gwarea,'\t')<0){ + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword(smartfilter,sizeof(smartfilter),&gwarea,'\0')<0){ + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + getword_start(&gwarea,data); + if (getword_atoll(&iday,&gwarea,'/')<0 || iday<1 || iday>31){ + debuga(_("Maybe you have a broken date in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword_atoll(&imonth,&gwarea,'/')<0 || imonth<1 || imonth>12){ + debuga(_("Maybe you have a broken date in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword_atoll(&iyear,&gwarea,'\0')<0){ + debuga(_("Maybe you have a broken date in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + idata=builddia(iday,imonth,iyear); + computedate(iyear,imonth,iday,&tt); + if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 || + tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) { + debuga(_("Invalid time found in %s\n"),arq); + exit(EXIT_FAILURE); + } + t=&tt; + } + if (ilf==ILF_Isa) { + if (linebuf[0] == '#') { + int ncols,cols[ISACOL_Last]; + + fixendofline(linebuf); + getword_start(&gwarea,linebuf); + // remove the #Fields: column at the beginning of the line + if (getword_skip(1000,&gwarea,' ')<0){ + debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + for (ncols=0 ; ncols=0) { + isa_ncols=ncols; + for (ncols=0 ; ncols=sizeof(ip)) { + debuga(_("Maybe you have a broken IP in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + strcpy(ip,str); + } else if (x==isa_cols[ISACOL_UserName]) { + if (strlen(str)>=sizeof(user)) { + debuga(_("Maybe you have a broken user ID in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + strcpy(user,str); + } else if (x==isa_cols[ISACOL_Date]) { + if (strlen(str)>=sizeof(data)) { + debuga(_("Maybe you have a broken date in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + strcpy(data,str); + } else if (x==isa_cols[ISACOL_Time]) { + if (strlen(str)>=sizeof(hora)) { + debuga(_("Maybe you have a broken time in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + strcpy(hora,str); + } else if (x==isa_cols[ISACOL_TimeTaken]) { + if (strlen(str)>=sizeof(elap)) { + debuga(_("Maybe you have a broken download duration in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + strcpy(elap,str); + } else if (x==isa_cols[ISACOL_Bytes]) { + if (strlen(str)>=sizeof(tam)) { + debuga(_("Maybe you have a broken download size in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + strcpy(tam,str); + } else if (x==isa_cols[ISACOL_Uri]) { + full_url=str; + } else if (x==isa_cols[ISACOL_Status]) { + if (strlen(str)>=sizeof(code)) { + debuga(_("Maybe you have a broken access code in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + strcpy(code,str); + } + } + + if(strcmp(code,"401") == 0 || strcmp(code,"403") == 0 || strcmp(code,"407") == 0) { + sprintf(val1,"DENIED/%s",code); + strcpy(code,val1); + } + getword_start(&gwarea,data); + if (getword_atoll(&iyear,&gwarea,'-')<0){ + debuga(_("Maybe you have a broken year in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword_atoll(&imonth,&gwarea,'-')<0){ + debuga(_("Maybe you have a broken month in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + if (getword_atoll(&iday,&gwarea,'\0')<0){ + debuga(_("Maybe you have a broken day in your %s file\n"),arq); + exit(EXIT_FAILURE); + } + + idata=builddia(iday,imonth,iyear); + computedate(iyear,imonth,iday,&tt); + if (isa_cols[ISACOL_Time]>=0) { + if (sscanf(hora,"%d:%d:%d",&tt.tm_hour,&tt.tm_min,&tt.tm_sec)!=3 || tt.tm_hour<0 || tt.tm_hour>=24 || + tt.tm_min<0 || tt.tm_min>=60 || tt.tm_sec<0 || tt.tm_sec>=60) { + debuga(_("Invalid time found in %s\n"),arq); + exit(EXIT_FAILURE); + } + } + t=&tt; + } + if (t==NULL) { + debuga(_("Unknown input log file format\n")); + break; + } + + strftime(dia, sizeof(dia), "%d/%m/%Y", t); + snprintf(hora,sizeof(hora),"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec); + + if(debugm) + printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,dfrom,duntil); + + if(Filter->DateRange[0] != '\0'){ + if(idata < dfrom || idata > duntil) continue; + } + + // Record only hours usage which is required + if (t) { + if( bsearch( &( t -> tm_wday ), weekdays.list, weekdays.len, sizeof( int ), compar ) == NULL ) + continue; + + if( bsearch( &( t -> tm_hour ), hours.list, hours.len, sizeof( int ), compar ) == NULL ) + continue; + } + + + if(strlen(user) > MAX_USER_LEN) { + if (debugm) printf(_("User ID too long: %s\n"),user); + totregsx++; + continue; + } + + // include_users + if(IncludeUsers[0] != '\0') { + snprintf(val1,sizeof(val1),":%s:",user); + if((str=(char *) strstr(IncludeUsers,val1)) == (char *) NULL ) + continue; + } + + if(vercode(code)) { + if (debugm) printf(_("Excluded code: %s\n"),code); + totregsx++; + continue; + } + + if(testvaliduserchar(user)) + continue; + + #if 0 + if((str = strstr(user,"%20")) != NULL) { + /* + This is a patch introduced to solve bug #1624251 reported at sourceforge but + the side effect is to truncate the name at the first space and merge the reports + of people whose name is identical up to the first space. + + The old code used to truncate the user name at the first % if a %20 was + found anywhere in the string. That means the string could be truncated + at the wrong place if another % occured before the %20. This new code should + avoid that problem and only truncate at the space. There is no bug + report indicating that anybody noticed this. + */ + *str='\0'; + } + + /* + Code prior to 2.2.7 used to replace any %xx by a dot as long as a %5c was + found in the user name. + */ + while((str = strstr(user,"%5c")) != NULL) { + *str='.'; + for (x=3 ; str[x] ; x++) str[x-2]=str[x]; + } + #endif + // replace any tab by a single space + for (str=full_url ; *str ; str++) + if (*str=='\t') *str=' '; + for (str=code ; *str ; str++) + if (*str=='\t') *str=' '; + + if(ilf!=ILF_Sarg) { + /* + The full URL is not saved in sarg log. There is no point in testing the URL to detect + a downloaded file. + */ + download_flag=is_download_suffix(full_url); + if (download_flag) { + safe_strcpy(download_url,full_url,sizeof(download_url)); + download_count++; + } + } else + download_flag=false; + + url=process_url(full_url,LongUrl); + if (!url || url[0] == '\0') continue; + + if(addr[0] != '\0'){ + if(strcmp(addr,ip)!=0) continue; + } + if(Filter->HostFilter) { + if(!vhexclude(url)) { + if (debugm) printf(_("Excluded site: %s\n"),url); + totregsx++; + continue; + } + } + + if(Filter->StartTime >= 0 && Filter->EndTime >= 0) { + hmr=t->tm_hour*100+t->tm_min; + if(hmr < Filter->StartTime || hmr > Filter->EndTime) continue; + } + + if(site[0] != '\0'){ + if(strstr(url,site)==0) continue; + } + + if(UserIp) { + strcpy(user,ip); + id_is_ip=true; + } else { + id_is_ip=false; + if(strcmp(user,"-") == 0 || strcmp(user," ") == 0 || strcmp(user,"") == 0) { + if(RecordsWithoutUser == RECORDWITHOUTUSER_IP) { + strcpy(user,ip); + id_is_ip=true; + } + if(RecordsWithoutUser == RECORDWITHOUTUSER_IGNORE) + continue; + if(RecordsWithoutUser == RECORDWITHOUTUSER_EVERYBODY) + strcpy(user,"everybody"); + } else { + strlow(user); + if(NtlmUserFormat == NTLMUSERFORMAT_USER) { + if((str = strchr(user,'_')) != 0) { + strcpy(warea,str+1); + strcpy(user,warea); + } + if((str = strchr(user,'+')) != 0) { + strcpy(warea,str+1); + strcpy(user,warea); + } + } + } + } + + if(us[0] != '\0'){ + if(strcmp(user,us)!=0) continue; + } + + if(Filter->SysUsers) { + snprintf(wuser,sizeof(wuser),":%s:",user); + if(strstr(userfile, wuser) == 0) + continue; + } + + if(Filter->UserFilter) { + if(!vuexclude(user)) { + if (debugm) printf(_("Excluded user: %s\n"),user); + totregsx++; + continue; + } + } + + if(strcmp(user,"-") ==0 || strcmp(user," ") ==0 || strcmp(user,"") ==0 || strcmp(user,":") ==0) + continue; + + nbytes=atol(tam); + if (nbytes<0) nbytes=0; + + elap_time=atol(elap); + if (elap_time<0) elap_time=0; + if (Filter->max_elapsed>0 && elap_time>Filter->max_elapsed) { + elap_time=0; + } + + if((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) { + fixendofline(str); + snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1); + } else strcpy(smartfilter,"\"\""); + + nopen=0; + prev_ufile=NULL; + for (ufile=first_user_file ; ufile && strcmp(user,ufile->user->id)!=0 ; ufile=ufile->next) { + prev_ufile=ufile; + if (ufile->file) nopen++; + } + if (!ufile) { + ufile=malloc(sizeof(*ufile)); + if (!ufile) { + debuga(_("Not enough memory to store the user %s\n"),user); + exit(EXIT_FAILURE); + } + memset(ufile,0,sizeof(*ufile)); + ufile->next=first_user_file; + first_user_file=ufile; + uinfo=userinfo_create(user); + ufile->user=uinfo; + uinfo->id_is_ip=id_is_ip; + } else { + if (prev_ufile) { + prev_ufile->next=ufile->next; + ufile->next=first_user_file; + first_user_file=ufile; + } + } + #ifdef ENABLE_DOUBLE_CHECK_DATA + ufile->user->nbytes+=nbytes; + ufile->user->elap+=elap_time; + #endif + + if (ufile->file==NULL) { + if (nopen>=maxopenfiles) { + x=0; + for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) { + if (ufile1->file!=NULL) { + if (x>=maxopenfiles) { + if (fclose(ufile1->file)==EOF) { + debuga(_("Failed to close the log file of user %s - %s\n"),ufile1->user->id,strerror(errno)); + exit(EXIT_FAILURE); + } + ufile1->file=NULL; + } + x++; + } + } + } + if (snprintf (tmp3, sizeof(tmp3), "%s/%s.unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) { + debuga(_("Temporary user file name too long: %s/%s.unsort\n"), tmp, ufile->user->filename); + exit(EXIT_FAILURE); + } + if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) { + debuga(_("(log) Cannot open temporary file: %s - %s\n"), tmp3, strerror(errno)); + exit (1); + } + } + + /*if ( strcmp ( user , sz_Last_User ) != 0 ) { + if ( fp_Write_User ) + fclose( fp_Write_User ) ; + sprintf (tmp3, "%s/%s.unsort", tmp, user); + + if ((fp_Write_User = MY_FOPEN (tmp3, "a")) == NULL) { + fprintf (stderr, "%s: (log) %s: %s - %s\n", argv[0], _("Cannot open temporary file"), tmp3, strerror(errno)); + exit (1); + } + strcpy( sz_Last_User , user ) ; + }*/ + if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%ld\t%s\t%ld\t%s\n",dia,hora,ip,url,nbytes,code,elap_time,smartfilter)<=0) { + debuga(_("Write error in the log file of user %s\n"),user); + exit(EXIT_FAILURE); + } + + if(fp_log && ilf!=ILF_Sarg) + fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%ld\t%s\t%ld\t%s\n",dia,hora,user,ip,url,nbytes,code,elap_time,smartfilter); + + totregsg++; + + if(!dataonly && download_flag && strstr(code,"DENIED") == 0) { + ndownload = 1; + + if ( ! fp_Download_Unsort ) { + if ((fp_Download_Unsort = MY_FOPEN ( sz_Download_Unsort, "a")) == NULL) { + debuga(_("(log) Cannot open temporary file: %s - %s\n"),sz_Download_Unsort, strerror(errno)); + exit (1); + } + } + fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,download_url); + } + + if((ReportType & REPORT_TYPE_DENIED) != 0) { + if(fp_denied && strstr(code,"DENIED/403") != 0) { + fprintf(fp_denied, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,full_url); + denied_count++; + } + } + if((ReportType & REPORT_TYPE_AUTH_FAILURES) != 0) { + if(fp_authfail && (strstr(code,"DENIED/401") != 0 || strstr(code,"DENIED/407") != 0)) { + fprintf(fp_authfail, "%s\t%s\t%s\t%s\t%s\n",dia,hora,user,ip,full_url); + authfail_count++; + } + } + + if (ilf!=ILF_Sarg) { + if(!totper || idatamaxdate) { + maxdate=idata; + memcpy(&period.end,t,sizeof(*t)); + } + totper=true; + } + + if(debugm){ + printf("IP=\t%s\n",ip); + printf("USER=\t%s\n",user); + printf("ELAP=\t%ld\n",elap_time); + printf("DATE=\t%s\n",dia); + printf("TIME=\t%s\n",hora); + printf("FUNC=\t%s\n",fun); + printf("URL=\t%s\n",url); + printf("CODE=\t%s\n",code); + printf("LEN=\t%ld\n",nbytes); + } + } + + if (!from_stdin) { + if (from_pipe) + pclose(fp_in); + else { + fclose(fp_in); + if( ShowReadStatistics ) + printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs1, (float) 100 ); + } + } + } + longline_destroy(&line); + + if(fp_log != NULL) { + char end_hour[128]; + char val2[40]; + char val4[255];//val4 must not be bigger than arq_log without fixing the strcpy below + + fclose(fp_log); + safe_strcpy(end_hour,tbuf2,sizeof(end_hour)); + strftime(val2,sizeof(val2),"%d%m%Y",&period.start); + strftime(val1,sizeof(val1),"%d%m%Y",&period.end); + if (snprintf(val4,sizeof(val4),"%s/sarg-%s_%s-%s_%s.log",ParsedOutputLog,val2,start_hour,val1,end_hour)>=sizeof(val4)) { + debuga(_("File name too long: %s/sarg-%s_%s-%s_%s.log\n"),ParsedOutputLog,val2,start_hour,val1,end_hour); + exit(EXIT_FAILURE); + } + if (rename(arq_log,val4)) { + debuga(_("failed to rename %s to %s - %s\n"),arq_log,val4,strerror(errno)); + } else { + strcpy(arq_log,val4); + + if(strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') { + /* + No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are + necessary around the command name, put them in the configuration file. + */ + if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,arq_log)>=sizeof(val1)) { + debuga(_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,arq_log); + exit(EXIT_FAILURE); + } + cstatus=system(val1); + if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) { + debuga(_("command return status %d\n"),WEXITSTATUS(cstatus)); + debuga(_("command: %s\n"),val1); + exit(EXIT_FAILURE); + } + } + } + if(debug) + debuga(_("Sarg parsed log saved as %s\n"),arq_log); + } + + if (fp_denied) fclose(fp_denied); + if (fp_authfail) fclose(fp_authfail); + if (fp_Download_Unsort) fclose (fp_Download_Unsort); + + for (ufile=first_user_file ; ufile ; ufile=ufile1) { + ufile1=ufile->next; + if (ufile->file!=NULL) fclose(ufile->file); + free(ufile); + } + + if (debug) { + int totalcount=0; + + debuga(_(" Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx); + + for (ilf=0 ; ilf0 && ilf_count[ILF_Squid]>0) + debuga(_("Log with mixed records format (squid and common log)\n")); + + if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0) + debuga(_("Common log format\n")); + + if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0) + debuga(_("Squid log format\n")); + + if(ilf_count[ILF_Sarg]>0) + debuga(_("Sarg log format\n")); + + if(totalcount==0 && totregsg) + debuga(_("Log with invalid format\n")); + } + + if(debugz){ + debugaz(_("date=%s\n"),dia); + debugaz(_("period=%s\n"),period.text); + } + + if(DataFile[0] == '\0' && (ReportType & REPORT_TYPE_DENIED) != 0) { + if (snprintf(csort,sizeof(csort),"sort -T \"%s\" -t \"\t\" -k 3,3 -k 5,5 -o \"%s\" \"%s\"",tmp,denied_sort,denied_unsort)>=sizeof(csort)) { + debuga(_("Sort command too long when sorting file \"%s\" to \"%s\"\n"),denied_unsort,denied_sort); + exit(EXIT_FAILURE); + } + cstatus=system(csort); + if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) { + debuga(_("sort command return status %d\n"),WEXITSTATUS(cstatus)); + debuga(_("sort command: %s\n"),csort); + exit(EXIT_FAILURE); + } + if (unlink(denied_unsort)) { + debuga(_("Cannot delete %s - %s\n"),denied_unsort,strerror(errno)); + exit(EXIT_FAILURE); + } + } + + return((totregsg!=0) ? 1 : 0); +} diff --git a/report.c b/report.c index 4ede8ab..97d962d 100644 --- a/report.c +++ b/report.c @@ -29,6 +29,8 @@ //! The global statistics of the whole log read. struct globalstatstruct globstat; +//! \c True to enable the smart filter. +bool smartfilter=false; static FILE *fp_tt=NULL; @@ -81,7 +83,7 @@ void gerarel(void) DayObject daystat; ipantes[0]='\0'; - smartfilter=0; + smartfilter=false; memset(&globstat,0,sizeof(globstat)); if (vrfydir(&period, addr, site, us, email)<0) { @@ -177,7 +179,7 @@ void gerarel(void) } if(accsmart[0] != '\0') { - smartfilter++; + smartfilter=true; grava_SmartFilter(outdirname,uinfo->id,accip,accdia,acchora,accurl,accsmart); } diff --git a/topuser.c b/topuser.c index eb7c5bd..67c3df1 100644 --- a/topuser.c +++ b/topuser.c @@ -28,6 +28,7 @@ #include "include/defs.h" extern struct globalstatstruct globstat; +extern bool smartfilter; /*! Save the total number of users. The number is written in sarg-users and set