]> git.ipfire.org Git - thirdparty/sarg.git/commitdiff
Input log file type detection partly rewritten to clearly distinguish which type...
authorFrédéric Marchal <fmarchal@users.sourceforge.net>
Thu, 3 Dec 2009 21:13:59 +0000 (21:13 +0000)
committerFrédéric Marchal <fmarchal@users.sourceforge.net>
Thu, 3 Dec 2009 21:13:59 +0000 (21:13 +0000)
Read the input log file from standard input if log file name is -.

ChangeLog
log.c
sarg.1

index 431968cf47902415fe2868e690cfb68eff257c5a..dc0770d37e5dc8ab3e109b011aabf10e4b4c656d 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,9 +1,12 @@
 SARG ChangeLog
 
 Dec-03-2009    Version 2.2.7
-               - extra compile and run time protection (FORTIFY_SOURCE) fixed in configure.
-               - invalid sort field separator in useragent.c
+               - Extra compile and run time protection (FORTIFY_SOURCE) fixed in configure.
+               - Invalid sort field separator in useragent.c
                        Thanks to Maxim Britov <maxim@office.modum.by>
+               - Use tabulations as columns separator in intermediary files to avoid problems when a field of the log contains a space.
+               - Input log file type detection partly rewritten to clearly distinguish which type is processed where.
+               - Read the input log file from standard input if log file name is -.
 
 Oct-14-2009 Version 2.2.6
                - Protection against buffer overflows in getword and friends and report the origin of the error instead of always blaming access.log.
diff --git a/log.c b/log.c
index 3a5c111e9f6670dd56dfb29be0c53c058d70f7ec..0d15eda59bba1e820a782baa0b9a7b81fb619063 100644 (file)
--- a/log.c
+++ b/log.c
@@ -56,6 +56,14 @@ int main(int argc,char *argv[])
       ISACOL_Status,
       ISACOL_Last //last entry of the list !
    };
+   enum InputLogFormat {
+      ILF_Unknown,
+      ILF_Squid,
+      ILF_Common,
+      ILF_Sarg,
+      ILF_Isa,
+      ILF_Last //last entry of the list !
+   };
 
    FILE *fp_in = NULL, *fp_denied=NULL, *fp_authfail=NULL, *fp_log=NULL;
 
@@ -100,9 +108,8 @@ int main(int argc,char *argv[])
    char *str;
    char bufz[MAXLEN];
    char bufy[MAXLEN];
-   int  common;
-   int  common_log=0;
-   int  squid_log=0;
+   enum InputLogFormat ilf;
+   int ilf_count[ILF_Last];
    int  ch;
    int  x, l;
    int  errflg=0;
@@ -116,11 +123,12 @@ int main(int argc,char *argv[])
    int  iarq=0;
    int  exstring=0;
    int isa_ncols=0,isa_cols[ISACOL_Last];
+   int from_stdin;
    long totregsl=0;
    long totregsg=0;
    long totregsx=0;
    long totper=0;
-   long int  max_elapsed=0;
+   long int max_elapsed=0;
    time_t tt;
    struct tm *t;
    off_t recs1=0;
@@ -150,6 +158,7 @@ int main(int argc,char *argv[])
    SquidGuardLogFormat[0]='\0';
    SquidGuardLogAlternate[0]='\0';
    arq[0]='\0';
+   for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
 
    strcpy(AccessLog,"/var/log/squid/access.log");
    sprintf(ExcludeCodes,"%s/exclude_codes",SYSCONFDIR);
@@ -270,8 +279,6 @@ int main(int argc,char *argv[])
    color1=0;
    color2=0;
    color3=0;
-   sarglog=0;
-   isalog=0;
    dotinuser=0;
    realt=0;
    realtime_refresh=3;
@@ -717,55 +724,25 @@ int main(int argc,char *argv[])
       iarq++;
 
       strcpy(arqtt,arq);
-      decomp(arq,zip,tmp);
-      if(debug)
-         debuga("%s: %s",text[7],arq);
 
-      if((fp_in=MY_FOPEN(arq,"r"))==NULL) {
-         fprintf(stderr, "%s: (log) %s: %s - %s\n",argv[0],text[8],arq,strerror(errno));
-         exit(1);
-      }
-      if (!fgets(bufz,sizeof(bufz),fp_in)) {
-         if (feof(fp_in)) {
-            fprintf(stderr,"SARG: %s %s\n",text[141],arq);
-            if (narq>0) fprintf(stderr,"SARG: %s\n",text[142]);
-            fclose(fp_in);
-            continue;
-         }
-         fprintf(stderr,"SARG: read error in %s\n",arq);
-         exit(1);
-      }
-      if(!isalog && strncmp(bufz,"#Software: Mic",14) == 0) {
-         fixendofline(bufz);
-         debuga("%s: %s",text[143],bufz);
-         isalog++;
-      }
-
-      if(strncmp(bufz,"*** SARG Log ***",16) == 0) {
-         if (getword(val2,sizeof(val2),arqtt,'-')<0 || getword(val2,sizeof(val2),arqtt,'_')<0 ||
-            getword(val3,sizeof(val3),arqtt,'-')<0 || getword(val3,sizeof(val3),arqtt,'_')<0) {
-            printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
-            exit(1);
-         }
-         sprintf(period,"%s-%s",val2,val3);
-         sarglog=1;
-      } else fseek(fp_in, 0, SEEK_SET);
-
-      if(strcmp(ParsedOutputLog, "no") != 0 && !sarglog) {
-         if(access(ParsedOutputLog,R_OK) != 0) {
-            sprintf(csort,"%s",ParsedOutputLog);
-            my_mkdir(csort);
-         }
-         sprintf(arq_log,"%s/sarg_temp.log",ParsedOutputLog);
-         if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
-            fprintf(stderr, "%s: (log) %s: %s - %s\n",argv[0],text[8],arq_log,strerror(errno));
+      if(strcmp(arq,"-")==0) {
+         if(debug)
+            debuga("%s: %s",text[7],"stdin");
+         fp_in=stdin;
+         from_stdin=1;
+      } else {
+         decomp(arq,zip,tmp);
+         if(debug)
+            debuga("%s: %s",text[7],arq);
+         if((fp_in=MY_FOPEN(arq,"r"))==NULL) {
+            fprintf(stderr, "%s: (log) %s: %s - %s\n",argv[0],text[8],arq,strerror(errno));
             exit(1);
          }
-         fputs("*** SARG Log ***\n",fp_log);
+         from_stdin=0;
       }
-
+      ilf=ILF_Unknown;
       // pre-Read the file only if I have to show stats
-      if(bool_ShowReadStatistics) {
+      if(bool_ShowReadStatistics && !from_stdin) {
          rewind(fp_in);
          recs1=0;
          recs2=0;
@@ -777,8 +754,44 @@ int main(int argc,char *argv[])
       }
 
       while(fgets(bufz,sizeof(bufz),fp_in)!=NULL) {
+
+         if (ilf==ILF_Unknown) {
+            if(strncmp(bufz,"#Software: Mic",14) == 0) {
+               fixendofline(bufz);
+               debuga("%s: %s",text[143],bufz);
+               ilf=ILF_Isa;
+               ilf_count[ilf]++;
+               continue;
+            }
+
+            if(strncmp(bufz,"*** SARG Log ***",16) == 0) {
+               if (getword(val2,sizeof(val2),arqtt,'-')<0 || getword(val2,sizeof(val2),arqtt,'_')<0 ||
+                   getword(val3,sizeof(val3),arqtt,'-')<0 || getword(val3,sizeof(val3),arqtt,'_')<0) {
+                  printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
+                  exit(1);
+               }
+               sprintf(period,"%s-%s",val2,val3);
+               ilf=ILF_Sarg;
+               ilf_count[ilf]++;
+               continue;
+            }
+         }
+
+         if(strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg) {
+            if(access(ParsedOutputLog,R_OK) != 0) {
+               sprintf(csort,"%s",ParsedOutputLog);
+               my_mkdir(csort);
+            }
+            sprintf(arq_log,"%s/sarg_temp.log",ParsedOutputLog);
+            if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
+               fprintf(stderr, "%s: (log) %s: %s - %s\n",argv[0],text[8],arq_log,strerror(errno));
+               exit(1);
+            }
+            fputs("*** SARG Log ***\n",fp_log);
+         }
+
          recs2++;
-         if( bool_ShowReadStatistics && ! --OutputNonZero) {
+         if( bool_ShowReadStatistics && !from_stdin && ! --OutputNonZero) {
            perc = recs2 * 100 ;
            perc = perc / recs1 ;
            printf("SARG: Records in file: " OFFSET_STRING", reading: %3.2f%%\r",recs1,perc);
@@ -827,11 +840,10 @@ int main(int argc,char *argv[])
             *str = '\0';          /* strip \n */
 
          totregsl++;
-         common=0;
          if(debugm)
             printf("BUF=%s\n",bufz);
 
-         if(!sarglog && !isalog) {
+         if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
             if (getword(data,sizeof(data),bufz,' ')<0) {
                printf("SARG: Maybe you have a broken record or garbage in your access.log file.\n");
                exit(1);
@@ -872,12 +884,12 @@ int main(int argc,char *argv[])
                   if(strcmp(tam,"\0") == 0)
                      strcpy(tam,"0");
 
-                  common++;
-                  common_log=1;
+                  ilf=ILF_Common;
+                  ilf_count[ilf]++;
                }
             }
 
-            if(!common) {
+            if(ilf==ILF_Unknown || ilf==ILF_Squid) {
                if (getword(elap,sizeof(elap),bufz,' ')<0) {
                   printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
                   exit(1);
@@ -916,9 +928,11 @@ int main(int argc,char *argv[])
                   printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
                   exit(1);
                }
-               squid_log=1;
+               ilf=ILF_Squid;
+               ilf_count[ilf]++;
             }
-         } else if(!isalog) {
+         }
+         if (ilf==ILF_Sarg) {
             if (getword(data,sizeof(data),bufz,' ')<0){
                printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
                exit(1);
@@ -955,7 +969,8 @@ int main(int argc,char *argv[])
                printf("SARG: Maybe you have a broken record or garbage in your %s file.\n",arq);
                exit(1);
             }
-         } else if(isalog) {
+         }
+         if (ilf==ILF_Isa) {
             if (bufz[0] == '#') {
                int ncols,cols[ISACOL_Last];
 
@@ -1165,60 +1180,58 @@ int main(int argc,char *argv[])
             }
          }
 
-         if(!sarglog) {
-            if(!common && !isalog) {
-               tt=atoi(data);
-               t=localtime(&tt);
-
-               strftime(tbuf2, sizeof(tbuf2), "%H%M", t);
-               if(strncmp(df,"u",1) == 0)
-                  strftime(tbuf, sizeof(tbuf), "%Y%b%d", t);
-               if(strncmp(df,"e",1) == 0)
-                  strftime(tbuf, sizeof(tbuf), "%d%b%Y", t);
-               if(strncmp(df,"w",1) == 0) {
-                  strcpy(IndexTree,"file");
-                  strftime(tbuf, sizeof(tbuf), "%Y.%U", t);
-               }
-
-               strftime(wdata, sizeof(wdata), "%Y%m%d", t);
-               idata=atoi(wdata);
+         if(ilf==ILF_Squid) {
+            tt=atoi(data);
+            t=localtime(&tt);
+
+            strftime(tbuf2, sizeof(tbuf2), "%H%M", t);
+            if(strncmp(df,"u",1) == 0)
+               strftime(tbuf, sizeof(tbuf), "%Y%b%d", t);
+            if(strncmp(df,"e",1) == 0)
+               strftime(tbuf, sizeof(tbuf), "%d%b%Y", t);
+            if(strncmp(df,"w",1) == 0) {
+               strcpy(IndexTree,"file");
+               strftime(tbuf, sizeof(tbuf), "%Y.%U", t);
+            }
 
-               if(strncmp(df,"u",1)==0)
-                  strftime(dia, sizeof(dia), "%m/%d/%Y", t);
-               else
-                  strftime(dia, sizeof(dia), "%d/%m/%Y", t);
-               sprintf(hora,"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec);
-            } else {
-               strcpy(wtemp,data+1);
-               if (getword_multisep(data,sizeof(data),wtemp,':')<0){
-                  printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
-                  exit(1);
-               }
-               if (getword_multisep(hora,sizeof(hora),wtemp,' ')<0){
-                  printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
-                  exit(1);
-               }
-               if (getword_multisep(dia,sizeof(dia),data,'/')<0){
-                  printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
-                  exit(1);
-               }
-               if (getword_multisep(mes,sizeof(mes),data,'/')<0){
-                  printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
-                  exit(1);
-               }
-               if (getword_multisep(ano,sizeof(ano),data,'/')<0){
-                  printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
-                  exit(1);
-               }
+            strftime(wdata, sizeof(wdata), "%Y%m%d", t);
+            idata=atoi(wdata);
 
-               if(strcmp(df,"u") == 0)
-                  snprintf(tbuf,sizeof(tbuf),"%s%s%s",ano,mes,dia);
-               if(strcmp(df,"e") == 0)
-                  snprintf(tbuf,sizeof(tbuf),"%s%s%s",dia,mes,ano);
-               builddia(dia,mes,ano,df,wdata);
-               idata=atoi(wdata);
+            if(strncmp(df,"u",1)==0)
+               strftime(dia, sizeof(dia), "%m/%d/%Y", t);
+            else
+               strftime(dia, sizeof(dia), "%d/%m/%Y", t);
+            sprintf(hora,"%02d:%02d:%02d",t->tm_hour,t->tm_min,t->tm_sec);
+         } else if(ilf==ILF_Common || ilf==ILF_Isa) {
+            strcpy(wtemp,data+1);
+            if (getword_multisep(data,sizeof(data),wtemp,':')<0){
+               printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
+               exit(1);
+            }
+            if (getword_multisep(hora,sizeof(hora),wtemp,' ')<0){
+               printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
+               exit(1);
+            }
+            if (getword_multisep(dia,sizeof(dia),data,'/')<0){
+               printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
+               exit(1);
             }
-         } else {
+            if (getword_multisep(mes,sizeof(mes),data,'/')<0){
+               printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
+               exit(1);
+            }
+            if (getword_multisep(ano,sizeof(ano),data,'/')<0){
+               printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
+               exit(1);
+            }
+
+            if(strcmp(df,"u") == 0)
+               snprintf(tbuf,sizeof(tbuf),"%s%s%s",ano,mes,dia);
+            if(strcmp(df,"e") == 0)
+               snprintf(tbuf,sizeof(tbuf),"%s%s%s",dia,mes,ano);
+            builddia(dia,mes,ano,df,wdata);
+            idata=atoi(wdata);
+         } else if (ilf==ILF_Sarg) {
             if (getword_multisep(mes,sizeof(mes),data,'/')<0){
                printf("SARG: Maybe you have a broken date in your %s file.\n",arq);
                exit(1);
@@ -1357,7 +1370,7 @@ int main(int argc,char *argv[])
                }
                fputs (bufz, fp_Write_User);
 
-               if(strcmp(ParsedOutputLog, "no") != 0 && !sarglog)
+               if(strcmp(ParsedOutputLog, "no") != 0 && ilf!=ILF_Sarg)
                   fputs(bufz,fp_log);
 
                totregsg++;
@@ -1386,7 +1399,7 @@ int main(int argc,char *argv[])
                   }
                }
 
-               if((!totper || idata<mindate) && !sarglog){
+               if((!totper || idata<mindate) && ilf!=ILF_Sarg){
                   totper++;
                   mindate=idata;
                   sprintf(period,"%s-",tbuf);
@@ -1413,9 +1426,11 @@ int main(int argc,char *argv[])
             }
          }
       }
-      fclose(fp_in);
-      if( bool_ShowReadStatistics )
-        printf("SARG: Records in file: " OFFSET_STRING ", reading: %3.2f%%\n",recs1, (float) 100 );
+      if (!from_stdin) {
+         fclose(fp_in);
+         if( bool_ShowReadStatistics )
+            printf("SARG: Records in file: " OFFSET_STRING ", reading: %3.2f%%\n",recs1, (float) 100 );
+      }
    }
 
    if ( fp_Download_Unsort )
@@ -1425,21 +1440,25 @@ int main(int argc,char *argv[])
      fclose (fp_Write_User);
 
    if(debug) {
+      int totalcount=0;
+
+      for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
+
       debuga("   %s: %ld, %s: %ld, %s: %ld",text[10],totregsl,text[11],totregsg,text[68],totregsx);
 
-      if((common_log) && (squid_log))
+      if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
          debuga("%s",text[12]);
 
-      if((common_log) && (!squid_log))
+      if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
          debuga("%s",text[13]);
 
-      if((!common_log) && (squid_log))
+      if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
          debuga("%s",text[14]);
 
-      if(sarglog)
+      if(ilf_count[ILF_Sarg]>0)
          debuga("%s",text[124]);
 
-      if((!common_log) && (!squid_log) && (!sarglog) && (!isalog)) {
+      if(totalcount==0) {
          if(!totregsg) {
             fprintf(stderr, "SARG: %s\n",text[16]);
             fprintf(stderr, "SARG: %s\n",text[21]);
@@ -1481,7 +1500,7 @@ int main(int argc,char *argv[])
       exit(0);
    }
 
-   if(date[0] == '\0' && !sarglog) {
+   if(date[0] == '\0' && ilf_count[ILF_Sarg]==0) {
       strcat(period,tbuf);
       strcat(per_hour,tbuf2);
    }
@@ -1501,7 +1520,7 @@ int main(int argc,char *argv[])
    if(fp_authfail)
       fclose(fp_authfail);
 
-   if(strcmp(ParsedOutputLog, "no") != 0 && !sarglog) {
+   if(fp_log != NULL) {
       fclose(fp_log);
       strcpy(val1,period);
       if (getword_multisep(val2,sizeof(val2),val1,'-')<0){
diff --git a/sarg.1 b/sarg.1
index f3656a8192631b8254ba8cc4a13a3b97f86f4004..f61ab2523f26997d38db1fb00c06552389e8dbd1 100644 (file)
--- a/sarg.1
+++ b/sarg.1
@@ -94,8 +94,10 @@ Uses
 .IR "filename"
 as the input log. This option can be repeated up to 255 times to read multiple files. The files must be listed in chronological order from newest to eldest. If the files end with the extension
 .IR ".gz" ", " ".bz2" " or " ".Z"
-they are decompressed, read and then, in the case of
-.IR ".Z" ", recompressed."
+they are decompressed.
+If the file name is just
+.I -
+, the log file is read from standard input. In that case, it cannot be compressed.
 .TP
 .B \-n
 Enables ip address resolution.