From: Frédéric Marchal Date: Thu, 3 Dec 2009 21:13:59 +0000 (+0000) Subject: Input log file type detection partly rewritten to clearly distinguish which type... X-Git-Tag: v2_2_7~64 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=58d7d8fea64697ecbb5696362e27f855631f2995;p=thirdparty%2Fsarg.git Input log file type detection partly rewritten to clearly distinguish which type is processed where. Read the input log file from standard input if log file name is -. --- diff --git a/ChangeLog b/ChangeLog index 431968c..dc0770d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,12 @@ SARG ChangeLog Dec-03-2009 Version 2.2.7 - - extra compile and run time protection (FORTIFY_SOURCE) fixed in configure. - - invalid sort field separator in useragent.c + - Extra compile and run time protection (FORTIFY_SOURCE) fixed in configure. + - Invalid sort field separator in useragent.c Thanks to Maxim Britov + - Use tabulations as columns separator in intermediary files to avoid problems when a field of the log contains a space. + - Input log file type detection partly rewritten to clearly distinguish which type is processed where. + - Read the input log file from standard input if log file name is -. Oct-14-2009 Version 2.2.6 - Protection against buffer overflows in getword and friends and report the origin of the error instead of always blaming access.log. diff --git a/log.c b/log.c index 3a5c111..0d15eda 100644 --- a/log.c +++ b/log.c @@ -56,6 +56,14 @@ int main(int argc,char *argv[]) ISACOL_Status, ISACOL_Last //last entry of the list ! }; + enum InputLogFormat { + ILF_Unknown, + ILF_Squid, + ILF_Common, + ILF_Sarg, + ILF_Isa, + ILF_Last //last entry of the list ! + }; FILE *fp_in = NULL, *fp_denied=NULL, *fp_authfail=NULL, *fp_log=NULL; @@ -100,9 +108,8 @@ int main(int argc,char *argv[]) char *str; char bufz[MAXLEN]; char bufy[MAXLEN]; - int common; - int common_log=0; - int squid_log=0; + enum InputLogFormat ilf; + int ilf_count[ILF_Last]; int ch; int x, l; int errflg=0; @@ -116,11 +123,12 @@ int main(int argc,char *argv[]) int iarq=0; int exstring=0; int isa_ncols=0,isa_cols[ISACOL_Last]; + int from_stdin; long totregsl=0; long totregsg=0; long totregsx=0; long totper=0; - long int max_elapsed=0; + long int max_elapsed=0; time_t tt; struct tm *t; off_t recs1=0; @@ -150,6 +158,7 @@ int main(int argc,char *argv[]) SquidGuardLogFormat[0]='\0'; SquidGuardLogAlternate[0]='\0'; arq[0]='\0'; + for (ilf=0 ; ilf0) fprintf(stderr,"SARG: %s\n",text[142]); - fclose(fp_in); - continue; - } - fprintf(stderr,"SARG: read error in %s\n",arq); - exit(1); - } - if(!isalog && strncmp(bufz,"#Software: Mic",14) == 0) { - fixendofline(bufz); - debuga("%s: %s",text[143],bufz); - isalog++; - } - - if(strncmp(bufz,"*** SARG Log ***",16) == 0) { - if (getword(val2,sizeof(val2),arqtt,'-')<0 || getword(val2,sizeof(val2),arqtt,'_')<0 || - getword(val3,sizeof(val3),arqtt,'-')<0 || getword(val3,sizeof(val3),arqtt,'_')<0) { - printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq); - exit(1); - } - sprintf(period,"%s-%s",val2,val3); - sarglog=1; - } else fseek(fp_in, 0, SEEK_SET); - - if(strcmp(ParsedOutputLog, "no") != 0 && !sarglog) { - if(access(ParsedOutputLog,R_OK) != 0) { - sprintf(csort,"%s",ParsedOutputLog); - my_mkdir(csort); - } - sprintf(arq_log,"%s/sarg_temp.log",ParsedOutputLog); - if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) { - fprintf(stderr, "%s: (log) %s: %s - %s\n",argv[0],text[8],arq_log,strerror(errno)); + if(strcmp(arq,"-")==0) { + if(debug) + debuga("%s: %s",text[7],"stdin"); + fp_in=stdin; + from_stdin=1; + } else { + decomp(arq,zip,tmp); + if(debug) + debuga("%s: %s",text[7],arq); + if((fp_in=MY_FOPEN(arq,"r"))==NULL) { + fprintf(stderr, "%s: (log) %s: %s - %s\n",argv[0],text[8],arq,strerror(errno)); exit(1); } - fputs("*** SARG Log ***\n",fp_log); + from_stdin=0; } - + ilf=ILF_Unknown; // pre-Read the file only if I have to show stats - if(bool_ShowReadStatistics) { + if(bool_ShowReadStatistics && !from_stdin) { rewind(fp_in); recs1=0; recs2=0; @@ -777,8 +754,44 @@ int main(int argc,char *argv[]) } while(fgets(bufz,sizeof(bufz),fp_in)!=NULL) { + + if (ilf==ILF_Unknown) { + if(strncmp(bufz,"#Software: Mic",14) == 0) { + fixendofline(bufz); + debuga("%s: %s",text[143],bufz); + ilf=ILF_Isa; + ilf_count[ilf]++; + continue; + } + + if(strncmp(bufz,"*** SARG Log ***",16) == 0) { + if (getword(val2,sizeof(val2),arqtt,'-')<0 || getword(val2,sizeof(val2),arqtt,'_')<0 || + getword(val3,sizeof(val3),arqtt,'-')<0 || getword(val3,sizeof(val3),arqtt,'_')<0) { + printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq); + exit(1); + } + sprintf(period,"%s-%s",val2,val3); + ilf=ILF_Sarg; + ilf_count[ilf]++; + continue; + } + } + + if(strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg) { + if(access(ParsedOutputLog,R_OK) != 0) { + sprintf(csort,"%s",ParsedOutputLog); + my_mkdir(csort); + } + sprintf(arq_log,"%s/sarg_temp.log",ParsedOutputLog); + if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) { + fprintf(stderr, "%s: (log) %s: %s - %s\n",argv[0],text[8],arq_log,strerror(errno)); + exit(1); + } + fputs("*** SARG Log ***\n",fp_log); + } + recs2++; - if( bool_ShowReadStatistics && ! --OutputNonZero) { + if( bool_ShowReadStatistics && !from_stdin && ! --OutputNonZero) { perc = recs2 * 100 ; perc = perc / recs1 ; printf("SARG: Records in file: " OFFSET_STRING", reading: %3.2f%%\r",recs1,perc); @@ -827,11 +840,10 @@ int main(int argc,char *argv[]) *str = '\0'; /* strip \n */ totregsl++; - common=0; if(debugm) printf("BUF=%s\n",bufz); - if(!sarglog && !isalog) { + if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) { if (getword(data,sizeof(data),bufz,' ')<0) { printf("SARG: Maybe you have a broken record or garbage in your access.log file.\n"); exit(1); @@ -872,12 +884,12 @@ int main(int argc,char *argv[]) if(strcmp(tam,"\0") == 0) strcpy(tam,"0"); - common++; - common_log=1; + ilf=ILF_Common; + ilf_count[ilf]++; } } - if(!common) { + if(ilf==ILF_Unknown || ilf==ILF_Squid) { if (getword(elap,sizeof(elap),bufz,' ')<0) { printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq); exit(1); @@ -916,9 +928,11 @@ int main(int argc,char *argv[]) printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq); exit(1); } - squid_log=1; + ilf=ILF_Squid; + ilf_count[ilf]++; } - } else if(!isalog) { + } + if (ilf==ILF_Sarg) { if (getword(data,sizeof(data),bufz,' ')<0){ printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq); exit(1); @@ -955,7 +969,8 @@ int main(int argc,char *argv[]) printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq); exit(1); } - } else if(isalog) { + } + if (ilf==ILF_Isa) { if (bufz[0] == '#') { int ncols,cols[ISACOL_Last]; @@ -1165,60 +1180,58 @@ int main(int argc,char *argv[]) } } - if(!sarglog) { - if(!common && !isalog) { - tt=atoi(data); - t=localtime(&tt); - - strftime(tbuf2, sizeof(tbuf2), "%H%M", t); - if(strncmp(df,"u",1) == 0) - strftime(tbuf, sizeof(tbuf), "%Y%b%d", t); - if(strncmp(df,"e",1) == 0) - strftime(tbuf, sizeof(tbuf), "%d%b%Y", t); - if(strncmp(df,"w",1) == 0) { - strcpy(IndexTree,"file"); - strftime(tbuf, sizeof(tbuf), "%Y.%U", t); - } - - strftime(wdata, sizeof(wdata), "%Y%m%d", t); - idata=atoi(wdata); + if(ilf==ILF_Squid) { + tt=atoi(data); + t=localtime(&tt); + + strftime(tbuf2, sizeof(tbuf2), "%H%M", t); + if(strncmp(df,"u",1) == 0) + strftime(tbuf, sizeof(tbuf), "%Y%b%d", t); + if(strncmp(df,"e",1) == 0) + strftime(tbuf, sizeof(tbuf), "%d%b%Y", t); + if(strncmp(df,"w",1) == 0) { + strcpy(IndexTree,"file"); + strftime(tbuf, sizeof(tbuf), "%Y.%U", t); + } - if(strncmp(df,"u",1)==0) - strftime(dia, sizeof(dia), "%m/%d/%Y", t); - else - strftime(dia, sizeof(dia), "%d/%m/%Y", t); - sprintf(hora,"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec); - } else { - strcpy(wtemp,data+1); - if (getword_multisep(data,sizeof(data),wtemp,':')<0){ - printf("SARG: Maybe you have a broken date in your %s file.\n",arq); - exit(1); - } - if (getword_multisep(hora,sizeof(hora),wtemp,' ')<0){ - printf("SARG: Maybe you have a broken date in your %s file.\n",arq); - exit(1); - } - if (getword_multisep(dia,sizeof(dia),data,'/')<0){ - printf("SARG: Maybe you have a broken date in your %s file.\n",arq); - exit(1); - } - if (getword_multisep(mes,sizeof(mes),data,'/')<0){ - printf("SARG: Maybe you have a broken date in your %s file.\n",arq); - exit(1); - } - if (getword_multisep(ano,sizeof(ano),data,'/')<0){ - printf("SARG: Maybe you have a broken date in your %s file.\n",arq); - exit(1); - } + strftime(wdata, sizeof(wdata), "%Y%m%d", t); + idata=atoi(wdata); - if(strcmp(df,"u") == 0) - snprintf(tbuf,sizeof(tbuf),"%s%s%s",ano,mes,dia); - if(strcmp(df,"e") == 0) - snprintf(tbuf,sizeof(tbuf),"%s%s%s",dia,mes,ano); - builddia(dia,mes,ano,df,wdata); - idata=atoi(wdata); + if(strncmp(df,"u",1)==0) + strftime(dia, sizeof(dia), "%m/%d/%Y", t); + else + strftime(dia, sizeof(dia), "%d/%m/%Y", t); + sprintf(hora,"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec); + } else if(ilf==ILF_Common || ilf==ILF_Isa) { + strcpy(wtemp,data+1); + if (getword_multisep(data,sizeof(data),wtemp,':')<0){ + printf("SARG: Maybe you have a broken date in your %s file.\n",arq); + exit(1); + } + if (getword_multisep(hora,sizeof(hora),wtemp,' ')<0){ + printf("SARG: Maybe you have a broken date in your %s file.\n",arq); + exit(1); + } + if (getword_multisep(dia,sizeof(dia),data,'/')<0){ + printf("SARG: Maybe you have a broken date in your %s file.\n",arq); + exit(1); } - } else { + if (getword_multisep(mes,sizeof(mes),data,'/')<0){ + printf("SARG: Maybe you have a broken date in your %s file.\n",arq); + exit(1); + } + if (getword_multisep(ano,sizeof(ano),data,'/')<0){ + printf("SARG: Maybe you have a broken date in your %s file.\n",arq); + exit(1); + } + + if(strcmp(df,"u") == 0) + snprintf(tbuf,sizeof(tbuf),"%s%s%s",ano,mes,dia); + if(strcmp(df,"e") == 0) + snprintf(tbuf,sizeof(tbuf),"%s%s%s",dia,mes,ano); + builddia(dia,mes,ano,df,wdata); + idata=atoi(wdata); + } else if (ilf==ILF_Sarg) { if (getword_multisep(mes,sizeof(mes),data,'/')<0){ printf("SARG: Maybe you have a broken date in your %s file.\n",arq); exit(1); @@ -1357,7 +1370,7 @@ int main(int argc,char *argv[]) } fputs (bufz, fp_Write_User); - if(strcmp(ParsedOutputLog, "no") != 0 && !sarglog) + if(strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg) fputs(bufz,fp_log); totregsg++; @@ -1386,7 +1399,7 @@ int main(int argc,char *argv[]) } } - if((!totper || idata0 && ilf_count[ILF_Squid]>0) debuga("%s",text[12]); - if((common_log) && (!squid_log)) + if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0) debuga("%s",text[13]); - if((!common_log) && (squid_log)) + if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0) debuga("%s",text[14]); - if(sarglog) + if(ilf_count[ILF_Sarg]>0) debuga("%s",text[124]); - if((!common_log) && (!squid_log) && (!sarglog) && (!isalog)) { + if(totalcount==0) { if(!totregsg) { fprintf(stderr, "SARG: %s\n",text[16]); fprintf(stderr, "SARG: %s\n",text[21]); @@ -1481,7 +1500,7 @@ int main(int argc,char *argv[]) exit(0); } - if(date[0] == '\0' && !sarglog) { + if(date[0] == '\0' && ilf_count[ILF_Sarg]==0) { strcat(period,tbuf); strcat(per_hour,tbuf2); } @@ -1501,7 +1520,7 @@ int main(int argc,char *argv[]) if(fp_authfail) fclose(fp_authfail); - if(strcmp(ParsedOutputLog, "no") != 0 && !sarglog) { + if(fp_log != NULL) { fclose(fp_log); strcpy(val1,period); if (getword_multisep(val2,sizeof(val2),val1,'-')<0){ diff --git a/sarg.1 b/sarg.1 index f3656a8..f61ab25 100644 --- a/sarg.1 +++ b/sarg.1 @@ -94,8 +94,10 @@ Uses .IR "filename" as the input log. This option can be repeated up to 255 times to read multiple files. The files must be listed in chronological order from newest to eldest. If the files end with the extension .IR ".gz" ", " ".bz2" " or " ".Z" -they are decompressed, read and then, in the case of -.IR ".Z" ", recompressed." +they are decompressed. +If the file name is just +.I - +, the log file is read from standard input. In that case, it cannot be compressed. .TP .B \-n Enables ip address resolution.