]> git.ipfire.org Git - thirdparty/sarg.git/commitdiff
Modular reading of some input log formats
authorFrédéric Marchal <fmarchal@users.sourceforge.net>
Sat, 25 Aug 2012 06:45:20 +0000 (08:45 +0200)
committerFrédéric Marchal <fmarchal@users.sourceforge.net>
Sat, 25 Aug 2012 06:45:20 +0000 (08:45 +0200)
Currently supported log formats are squid, common and sarg.

CMakeLists.txt
Makefile.in
include/readlog.h
readlog.c
readlog_common.c [new file with mode: 0644]
readlog_extlog.c [new file with mode: 0644]
readlog_sarg.c [new file with mode: 0644]
readlog_squid.c [new file with mode: 0644]

index a9ea2d94dfa6da3a86eddd0c1bb0eb36fb7e05e7..0e3d39e1d6d88a46cb6a0bad411389b9faab6085 100755 (executable)
@@ -52,7 +52,8 @@ SET(SRC util.c log.c report.c topuser.c email.c sort.c html.c
        smartfilter.c denied.c authfail.c charset.c dichotomic.c
        redirector.c auth.c download.c grepday.c ip2name_exec.c
        dansguardian_log.c dansguardian_report.c realtime.c btree_cache.c
-       usertab.c userinfo.c longline.c url.c readlog.c)
+       usertab.c userinfo.c longline.c url.c readlog.c readlog_squid.c
+       readlog_sarg.c readlog_extlog.c readlog_common.c)
 
 FOREACH(f ${SRC})
    ADD_FILE_DEPENDENCIES(${f} ${CMAKE_BINARY_DIR}/config.h ${CMAKE_SOURCE_DIR}/include/conf.h ${CMAKE_SOURCE_DIR}/include/info.h ${CMAKE_SOURCE_DIR}/include/defs.h)
index 3bbf0b5a85ad97d8d569605db877b8479511492e..b6a7b1ba5627b9167efc456e6b1fe521dcbd63b2 100644 (file)
@@ -38,7 +38,8 @@ SRCS = util.c log.c report.c topuser.c email.c sort.c html.c \
        smartfilter.c denied.c authfail.c charset.c dichotomic.c \
        redirector.c auth.c download.c grepday.c ip2name_exec.c \
        dansguardian_log.c dansguardian_report.c realtime.c btree_cache.c \
-       usertab.c userinfo.c longline.c url.c readlog.c
+       usertab.c userinfo.c longline.c url.c readlog.c readlog_squid.c \
+       readlog_sarg.c readlog_extlog.c readlog_common.c
 
 OBJS = $(SRCS:.c=.o)
 
index 2909a491680fdd0717b72d0e41deaba6269b45a7..f895064586e7b89cb8d4fdb8ecedcb5d0dc3c1f3 100644 (file)
@@ -8,8 +8,12 @@ enum ReadLogReturnCodeEnum
 {
        //! Line successfuly read.
        RLRC_NoError,
+       //! Line is known and should be ignored.
+       RLRC_Ignore,
        //! Unknown line format.
        RLRC_Unknown,
+       //! Error encountered during the parsing of the file.
+       RLRC_InternalError,
        
        RLRC_LastRetCode //!< last entry of the list.
 };
@@ -21,11 +25,15 @@ struct ReadLogStruct
 {
        //! The time corresponding to the entry.
        struct tm *EntryTime;
-       //! The IP address connecting to internet
+       //! The IP address connecting to internet.
        char *Ip;
        //! The user's name.
        char *User;
-       //! The URL of the visited site.
+       /*!
+       The URL of the visited site.
+       
+       The pointer may be NULL if the URL doesn't exists in the log file.
+       */
        char *Url;
        //! Time necessary to process the user's request.
        long int ElapsedTime;
@@ -35,4 +43,17 @@ struct ReadLogStruct
        char *HttpCode;
 };
 
+/*!
+\brief Functions to read a log file.
+*/
+struct ReadLogProcessStruct
+{
+       //! The name of the log file processed by this object.
+       const char *Name;
+       //! Inform the module about the reading of a new file.
+       void (*NewFile)(const char *FileName);
+       //! Funtion to read one entry from the log.
+       enum ReadLogReturnCodeEnum (*ReadEntry)(char *Line,struct ReadLogStruct *Entry);
+};
+
 #endif //READLOG_HEADER
index cdc0a3109436761624c206f0eeb87a8b1c36932c..1de337d3214d25fa4776c17ffa3baea98b11abdb 100644 (file)
--- a/readlog.c
+++ b/readlog.c
@@ -43,6 +43,20 @@ numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
 
 extern char *userfile;
 
+extern const struct ReadLogProcessStruct ReadSquidLog;
+extern const struct ReadLogProcessStruct ReadCommonLog;
+extern const struct ReadLogProcessStruct ReadSargLog;
+extern const struct ReadLogProcessStruct ReadExtLog;
+
+//! The list of the supported log formats.
+static const struct ReadLogProcessStruct const *LogFormats[]=
+{
+       &ReadSquidLog,
+       &ReadCommonLog,
+       &ReadSargLog,
+       &ReadExtLog
+};
+
 /*!
 Read the log files.
 
@@ -53,39 +67,17 @@ Read the log files.
 */
 int ReadLogFile(struct ReadLogDataStruct *Filter)
 {
-       enum isa_col_id {
-               ISACOL_Ip,
-               ISACOL_UserName,
-               ISACOL_Date,
-               ISACOL_Time,
-               ISACOL_TimeTaken,
-               ISACOL_Bytes,
-               ISACOL_Uri,
-               ISACOL_Status,
-               ISACOL_Last //last entry of the list !
-       };
-       enum InputLogFormat {
-               ILF_Unknown,
-               ILF_Squid,
-               ILF_Common,
-               ILF_Sarg,
-               ILF_Isa,
-               ILF_Last //last entry of the list !
-       };
-
-       enum InputLogFormat ilf;
-       int ilf_count[ILF_Last];
        longline line;
        char *linebuf;
        char *str;
        char arq_log[255];
-       char fun[MAXLEN];
-       char elap[255];
+       //char fun[MAXLEN];
+       //char elap[255];
        char user[MAX_USER_LEN];
-       char data[255];
-       char ip[60];
+       //char data[255];
+       //char ip[60];
        char hora[30];
-       char mes[30];
+       //char mes[30];
        char tbuf2[128];
        char dia[128]="";
        char wuser[MAXLEN];
@@ -100,7 +92,7 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
        int blen;
        int OutputNonZero = REPORT_EVERY_X_LINES ;
        int idata=0;
-       int isa_ncols=0,isa_cols[ISACOL_Last];
+       //int isa_ncols=0,isa_cols[ISACOL_Last];
        int x;
        int hmr;
        int nopen;
@@ -108,12 +100,14 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
        int mindate=0;
        int maxdate=0;
        int cstatus;
+       int current_format_idx;
+       int format_count[sizeof(LogFormats)/sizeof(*LogFormats)];
        unsigned long int recs1=0UL;
        unsigned long int recs2=0UL;
        long int totregsl=0;
        long int totregsg=0;
        long int totregsx=0;
-       long long int iyear, imonth, iday;
+       //long long int iyear, imonth, iday;
        FILE *fp_in=NULL;
        FILE *fp_log=NULL;
        FILE *fp_Download_Unsort=NULL;
@@ -122,18 +116,20 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
        bool download_flag=false;
        bool id_is_ip;
        bool totper=false;
+       enum ReadLogReturnCodeEnum log_entry_status;
        struct stat logstat;
        struct getwordstruct gwarea;
-       struct tm tt;
+       //struct tm tt;
        struct userfilestruct *prev_ufile;
        struct userinfostruct *uinfo;
        struct userfilestruct *first_user_file=NULL;
        struct userfilestruct *ufile;
        struct userfilestruct *ufile1;
        struct ReadLogStruct log_entry;
-       time_t tnum;
+       const struct ReadLogProcessStruct *current_format=NULL;
+       //time_t tnum;
 
-       for (ilf=0 ; ilf<ILF_Last ; ilf++) ilf_count[ilf]=0;
+       for (x=0 ; x<sizeof(format_count)/sizeof(*format_count) ; x++) format_count[x]=0;
        tmp3[0]='\0';
        start_hour[0]='\0';
        first_user_file=NULL;
@@ -153,7 +149,13 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
        for (iarq=0 ; iarq<NAccessLog ; iarq++) {
                arq=AccessLog[iarq];
 
-               if(strcmp(arq,"-")==0) {
+               current_format=NULL;
+               current_format_idx=-1;
+               for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++)
+                       if (LogFormats[x]->NewFile)
+                               LogFormats[x]->NewFile(arq);
+
+               if (arq[0]=='-' && arq[1]=='\0') {
                        if(debug)
                                debuga(_("Reading access log file: from stdin\n"));
                        fp_in=stdin;
@@ -179,7 +181,6 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
                        from_stdin=false;
                }
 
-               ilf=ILF_Unknown;
                download_flag=false;
 
                recs1=0UL;
@@ -215,11 +216,13 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
                while ((linebuf=longline_read(fp_in,line))!=NULL) {
                        blen=strlen(linebuf);
 
+#if 0
                        if (ilf==ILF_Unknown) {
                                if(strncmp(linebuf,"#Software: Mic",14) == 0) {
-                                       fixendofline(linebuf);
-                                       if (debug)
+                                       if (debug) {
+                                               fixendofline(linebuf);
                                                debuga(_("Log is from Microsoft ISA: %s\n"),linebuf);
+                                       }
                                        ilf=ILF_Isa;
                                        ilf_count[ilf]++;
                                        continue;
@@ -235,21 +238,7 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
                                        continue;
                                }
                        }
-
-                       if(!fp_log && ParsedOutputLog[0] && ilf!=ILF_Sarg) {
-                               if(access(ParsedOutputLog,R_OK) != 0) {
-                                       my_mkdir(ParsedOutputLog);
-                               }
-                               if (snprintf(arq_log,sizeof(arq_log),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(arq_log)) {
-                                       debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog);
-                                       exit(EXIT_FAILURE);
-                               }
-                               if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
-                                       debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
-                                       exit(EXIT_FAILURE);
-                               }
-                               fputs("*** SARG Log ***\n",fp_log);
-                       }
+#endif
 
                        recs2++;
                        if (ShowReadStatistics && --OutputNonZero<=0) {
@@ -263,9 +252,14 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
                                fflush (stdout);
                                OutputNonZero = REPORT_EVERY_X_LINES ;
                        }
-                       if(blen < 58) continue;
-                       if(strstr(linebuf,"HTTP/0.0") != 0) continue;
-                       if(strstr(linebuf,"logfile turned over") != 0) continue;
+                       
+                       /*
+                       The following checks are retained here as I don't know to
+                       what format they apply. They date back to pre 2.4 versions.
+                       */
+                       //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line
+                       if(strstr(linebuf,"HTTP/0.0") != 0) continue;//recorded by squid when encountering an incomplete query
+                       if(strstr(linebuf,"logfile turned over") != 0) continue;//reported by newsyslog
                        if(linebuf[0] == ' ') continue;
 
                        // exclude_string
@@ -291,7 +285,62 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
                        if(debugm)
                                printf("BUF=%s\n",linebuf);
 
+                       // process the line
+                       log_entry_status=RLRC_Unknown;
                        memset(&log_entry,0,sizeof(log_entry));
+                       if (current_format) {
+                               log_entry_status=current_format->ReadEntry(linebuf,&log_entry);
+                       }
+                       
+                       // find out what line format to use
+                       if (log_entry_status==RLRC_Unknown) {
+                               x=-1;
+                               while (log_entry_status==RLRC_Unknown && x<(int)(sizeof(LogFormats)/sizeof(*LogFormats))) {
+                                       x++;
+                                       if (LogFormats[x]==current_format) continue;
+                                       memset(&log_entry,0,sizeof(log_entry));
+                                       log_entry_status=LogFormats[x]->ReadEntry(linebuf,&log_entry);
+                               }
+                               if (x<0 || x>=(int)(sizeof(LogFormats)/sizeof(*LogFormats))) {
+                                       debuga(_("Unknown line format found in input log file %s\n"),arq);
+                                       exit(EXIT_FAILURE);
+                               }
+                               current_format=LogFormats[x];
+                               current_format_idx=x;
+                               if (debugz) {
+                                       /* TRANSLATORS: The argument is the log format name as translated by you. */
+                                       debuga(_("Log format identified as \"%s\" for %s\n"),_(current_format->Name),arq);
+                               }
+                       }
+                       if (log_entry_status==RLRC_Ignore) {
+                               continue;
+                       }
+                       if (current_format_idx<0 || current_format==NULL) {
+                               debuga(_("Sarg couldn't determine the format of the input log file %s\n"),arq);
+                               exit(EXIT_FAILURE);
+                       }
+                       if (log_entry_status==RLRC_InternalError) {
+                               debuga(_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),arq);
+                               exit(EXIT_FAILURE);
+                       }
+                       format_count[current_format_idx]++;
+
+                       if (!fp_log && ParsedOutputLog[0] && current_format!=&ReadSargLog) {
+                               if(access(ParsedOutputLog,R_OK) != 0) {
+                                       my_mkdir(ParsedOutputLog);
+                               }
+                               if (snprintf(arq_log,sizeof(arq_log),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(arq_log)) {
+                                       debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog);
+                                       exit(EXIT_FAILURE);
+                               }
+                               if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) {
+                                       debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno));
+                                       exit(EXIT_FAILURE);
+                               }
+                               fputs("*** SARG Log ***\n",fp_log);
+                       }
+
+#if 0
                        if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) {
                                getword_start(&gwarea,linebuf);
                                if (getword(data,sizeof(data),&gwarea,' ')<0) {
@@ -619,6 +668,7 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
                                }
                                log_entry.EntryTime=&tt;
                        }
+#endif
                        if (log_entry.EntryTime==NULL) {
                                debuga(_("Unknown input log file format: no time\n"));
                                break;
@@ -679,7 +729,7 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
                        for (str=log_entry.HttpCode ; *str ; str++)
                                if (*str=='\t') *str=' ';
 
-                       if(ilf!=ILF_Sarg) {
+                       if (current_format!=&ReadSargLog) {
                                /*
                                The full URL is not saved in sarg log. There is no point in testing the URL to detect
                                a downloaded file.
@@ -757,8 +807,8 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
                                }
                        }
 
-                       if(strcmp(log_entry.User,"-") ==0 || strcmp(log_entry.User," ") ==0 || 
-                          strcmp(log_entry.User,"") ==0 || strcmp(log_entry.User,":") ==0)
+                       if (log_entry.User[0]=='\0' || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' ||
+                          log_entry.User[0]==' ' || log_entry.User[0]==':')))
                                continue;
 
                        if (log_entry.DataSize<0) log_entry.DataSize=0;
@@ -798,10 +848,10 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
                                        first_user_file=ufile;
                                }
                        }
-       #ifdef ENABLE_DOUBLE_CHECK_DATA
+#ifdef ENABLE_DOUBLE_CHECK_DATA
                        ufile->user->nbytes+=log_entry.DataSize;
                        ufile->user->elap+=log_entry.ElapsedTime;
-       #endif
+#endif
 
                        if (ufile->file==NULL) {
                                if (nopen>=maxopenfiles) {
@@ -832,13 +882,18 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
                        strftime(dia, sizeof(dia), "%d/%m/%Y", log_entry.EntryTime);
                        strftime(hora,sizeof(hora),"%H:%M:%S",log_entry.EntryTime);
 
-                       if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,log_entry.Ip,url,(uint64_t)log_entry.DataSize,log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
+                       if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
+                                   log_entry.Ip,url,(uint64_t)log_entry.DataSize,
+                                   log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
                                debuga(_("Write error in the log file of user %s\n"),log_entry.User);
                                exit(EXIT_FAILURE);
                        }
 
-                       if(fp_log && ilf!=ILF_Sarg)
-                               fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
+                       if (fp_log && current_format!=&ReadSargLog) {
+                               fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
+                                       log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,
+                                       log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
+                       }
 
                        totregsg++;
 
@@ -851,13 +906,14 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
                                                exit (1);
                                        }
                                }
-                               fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,log_entry.User,log_entry.Ip,download_url);
+                               fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,
+                                       log_entry.User,log_entry.Ip,download_url);
                        }
 
                        denied_write(&log_entry);
                        authfail_write(&log_entry);
 
-                       if (ilf!=ILF_Sarg) {
+                       if (current_format!=&ReadSargLog) {
                                if(!totper || idata<mindate){
                                        mindate=idata;
                                        memcpy(&period.start,log_entry.EntryTime,sizeof(*log_entry.EntryTime));
@@ -876,7 +932,7 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
                                printf("ELAP=\t%ld\n",log_entry.ElapsedTime);
                                printf("DATE=\t%s\n",dia);
                                printf("TIME=\t%s\n",hora);
-                               printf("FUNC=\t%s\n",fun);
+                               //printf("FUNC=\t%s\n",fun);
                                printf("URL=\t%s\n",url);
                                printf("CODE=\t%s\n",log_entry.HttpCode);
                                printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize);
@@ -953,25 +1009,21 @@ int ReadLogFile(struct ReadLogDataStruct *Filter)
 
                debuga(_("   Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
 
-               for (ilf=0 ; ilf<ILF_Last ; ilf++) totalcount+=ilf_count[ilf];
-
-               if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]>0)
-                       debuga(_("Log with mixed records format (squid and common log)\n"));
-
-               if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0)
-                       debuga(_("Common log format\n"));
-
-               if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0)
-                       debuga(_("Squid log format\n"));
-
-               if(ilf_count[ILF_Sarg]>0)
-                       debuga(_("Sarg log format\n"));
+               for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) {
+                       if (format_count[x]>0) {
+                               /* TRANSLATORS: It displays the number of lines found in the input log files
+                               * for each supported log format. The log format name is the %s and is a string
+                               * you translate somewhere else. */
+                               debuga(_("%s: %d entries\n"),_(LogFormats[x]->Name),format_count[x]);
+                               totalcount+=format_count[x];
+                       }
+               }
 
-               if(totalcount==0 && totregsg)
+               if (totalcount==0 && totregsg)
                        debuga(_("Log with invalid format\n"));
        }
 
-       if(debugz){
+       if(debugz) {
                debugaz(_("date=%s\n"),dia);
                debugaz(_("period=%s\n"),period.text);
        }
diff --git a/readlog_common.c b/readlog_common.c
new file mode 100644 (file)
index 0000000..2a53130
--- /dev/null
@@ -0,0 +1,184 @@
+/*
+ * SARG Squid Analysis Report Generator      http://sarg.sourceforge.net
+ *                                                            1998, 2012
+ *
+ * SARG donations:
+ *      please look at http://sarg.sourceforge.net/donations.php
+ * Support:
+ *     http://sourceforge.net/projects/sarg/forums/forum/363374
+ * ---------------------------------------------------------------------
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
+ */
+
+#include "include/conf.h"
+#include "include/defs.h"
+
+/*!
+A new file is being read. The name of the file is \a FileName.
+*/
+static void Common_NewFile(const char *FileName)
+{
+}
+
+/*!
+Read one entry from a standard squid log format.
+
+\param Line One line from the input log file.
+\param Entry Where to store the information parsed from the line.
+
+\retval RLRC_NoError One valid entry is parsed.
+\retval RLRC_Unknown The line is invalid.
+\retval RLRC_InternalError An internal error was encountered.
+*/
+static enum ReadLogReturnCodeEnum Common_ReadEntry(char *Line,struct ReadLogStruct *Entry)
+{
+       const char *Begin;
+       int IpLen;
+       int HttpCodeLen;
+       int UrlLen;
+       int UserLen;
+       int Day;
+       char MonthName[4];
+       int MonthNameLen;
+       int Month;
+       int Year;
+       int Hour;
+       int Minute;
+       int Second;
+       static struct tm tt;
+
+       // get IP address
+       Entry->Ip=Line;
+       for (IpLen=0 ; *Line && *Line!=' ' ; IpLen++) Line++;
+       if (*Line!=' ' || IpLen==0) return(RLRC_Unknown);
+
+       if (!squid24) {
+               // squid version <= 2.4 store the user ID in the second column: skip the first column here
+               Begin=++Line;
+               while (*Line && *Line!=' ') Line++;
+               if (*Line!=' '|| Line==Begin) return(RLRC_Unknown);
+       }
+
+       // the ID of the user or - if the user is unidentified
+       Entry->User=++Line;
+       for (UserLen=0 ; *Line && *Line!=' ' ; UserLen++) Line++;
+       if (*Line!=' ' || UserLen==0) return(RLRC_Unknown);
+       
+       if (squid24) {
+               // squid version > 2.4 store the user ID in the first column: skip the second column here
+               Begin=++Line;
+               while (*Line && *Line!=' ') Line++;
+               if (*Line!=' '|| Line==Begin) return(RLRC_Unknown);
+       }
+
+       // get the date enclosed within square brackets
+       ++Line;
+       if (*Line!='[') return(RLRC_Unknown);
+       ++Line;
+       Day=0;
+       while (isdigit(*Line)) Day=Day*10+(*Line++-'0');
+       if (*Line!='/' || Day<1 || Day>31) return(RLRC_Unknown);
+
+       ++Line;
+       for (MonthNameLen=0 ; MonthNameLen<sizeof(MonthName)-1 && isalpha(*Line) ; MonthNameLen++) MonthName[MonthNameLen]=*Line++;
+       if (*Line!='/') return(RLRC_Unknown);
+       MonthName[MonthNameLen]='\0';
+       Month=month2num(MonthName);
+       if (Month>=12) return(RLRC_Unknown);
+
+       ++Line;
+       Year=0;
+       while (isdigit(*Line)) Year=Year*10+(*Line++-'0');
+       if (*Line!=':' || Year<1900 || Year>2200) return(RLRC_Unknown);
+       
+       // get the time
+       ++Line;
+       Hour=0;
+       while (isdigit(*Line)) Hour=Hour*10+(*Line++-'0');
+       if (*Line!=':' || Hour>=24) return(RLRC_Unknown);
+       ++Line;
+       Minute=0;
+       while (isdigit(*Line)) Minute=Minute*10+(*Line++-'0');
+       if (*Line!=':' || Minute>=60) return(RLRC_Unknown);
+       ++Line;
+       Second=0;
+       while (isdigit(*Line)) Second=Second*10+(*Line++-'0');
+       if (*Line!=' ' || Second>60) return(RLRC_Unknown); //second can be 60 due to a leap second
+
+       // skip the timezone up to the closing ]
+       while (*Line && *Line!=']') Line++;
+       if (*Line!=']') return(RLRC_Unknown);
+       
+       tt.tm_year=Year-1900;
+       tt.tm_mon=Month;
+       tt.tm_mday=Day;
+       tt.tm_hour=Hour;
+       tt.tm_min=Minute;
+       tt.tm_sec=Second;
+       Entry->EntryTime=&tt;
+
+       // the URL is enclosed between double qhotes
+       ++Line;
+       if (*Line!=' ') return(RLRC_Unknown);
+       ++Line;
+       if (*Line!='\"') return(RLRC_Unknown);
+       
+       // skip the HTTP function
+       Begin=++Line;
+       while (isalpha(*Line)) Line++;
+       if (*Line!=' ' || Line==Begin) return(RLRC_Unknown);
+       
+       // get the URL
+       Entry->Url=++Line;
+       for (UrlLen=0 ; *Line && *Line!=' ' ; UrlLen++) Line++;
+       if (*Line!=' ' || UrlLen==0) return(RLRC_Unknown);
+       
+       // skip the HTTP/...
+       ++Line;
+       while (*Line && *Line!='\"') Line++;
+       if (*Line!='\"') return(RLRC_Unknown);
+       ++Line;
+       if (*Line!=' ') return(RLRC_Unknown);
+
+       // get the HTTP code.
+       Entry->HttpCode=++Line;
+       for (HttpCodeLen=0 ; *Line && *Line!=' ' ; HttpCodeLen++) Line++;
+       if (*Line!=' ' || HttpCodeLen==0) return(RLRC_Unknown);
+       
+       // get the number of transfered bytes.
+       Begin=++Line;
+       Entry->DataSize=0LL;
+       while (isdigit(*Line)) Entry->DataSize=Entry->DataSize*10+(*Line++-'0');
+       if (*Line!=' ' || Begin==Line) return(RLRC_Unknown);
+       
+       // it is safe to alter the line buffer now that we are returning a valid entry
+       Entry->Ip[IpLen]='\0';
+       Entry->HttpCode[HttpCodeLen]='\0';
+       Entry->Url[UrlLen]='\0';
+       Entry->User[UserLen]='\0';
+       
+       return(RLRC_NoError);
+}
+
+//! \brief Object to read a standard common log format.
+const struct ReadLogProcessStruct ReadCommonLog=
+{
+       /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */
+       N_("common log format"),
+       Common_NewFile,
+       Common_ReadEntry
+};
diff --git a/readlog_extlog.c b/readlog_extlog.c
new file mode 100644 (file)
index 0000000..889a5a4
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * SARG Squid Analysis Report Generator      http://sarg.sourceforge.net
+ *                                                            1998, 2012
+ *
+ * SARG donations:
+ *      please look at http://sarg.sourceforge.net/donations.php
+ * Support:
+ *     http://sourceforge.net/projects/sarg/forums/forum/363374
+ * ---------------------------------------------------------------------
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
+ */
+
+#include "include/conf.h"
+#include "include/defs.h"
+
+/*!
+A new file is being read. The name of the file is \a FileName.
+*/
+static void ExtLog_NewFile(const char *FileName)
+{
+}
+
+/*!
+Read one entry from an extended log.
+
+\param Line One line from the input log file.
+\param Entry Where to store the information parsed from the line.
+
+\retval RLRC_NoError One valid entry is parsed.
+\retval RLRC_Unknown The line is invalid.
+\retval RLRC_InternalError An internal error was encountered.
+*/
+static enum ReadLogReturnCodeEnum ExtLog_ReadEntry(char *Line,struct ReadLogStruct *Entry)
+{
+       return(RLRC_NoError);
+}
+
+//! \brief Object to read an extended log.
+const struct ReadLogProcessStruct ReadExtLog=
+{
+       /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */
+       N_("extended log format"),
+       ExtLog_NewFile,
+       ExtLog_ReadEntry
+};
diff --git a/readlog_sarg.c b/readlog_sarg.c
new file mode 100644 (file)
index 0000000..7eda5b4
--- /dev/null
@@ -0,0 +1,160 @@
+/*
+ * SARG Squid Analysis Report Generator      http://sarg.sourceforge.net
+ *                                                            1998, 2012
+ *
+ * SARG donations:
+ *      please look at http://sarg.sourceforge.net/donations.php
+ * Support:
+ *     http://sourceforge.net/projects/sarg/forums/forum/363374
+ * ---------------------------------------------------------------------
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
+ */
+
+#include "include/conf.h"
+#include "include/defs.h"
+
+//! \c True if the current log is known to be a sarg parsed log.
+static bool InSargLog=false;
+
+/*!
+A new file is being read. The name of the file is \a FileName.
+*/
+static void Sarg_NewFile(const char *FileName)
+{
+       InSargLog=false;
+}
+
+/*!
+Read one entry from a sarg generated log.
+
+\param Line One line from the input log file.
+\param Entry Where to store the information parsed from the line.
+
+\retval RLRC_NoError One valid entry is parsed.
+\retval RLRC_Unknown The line is invalid.
+\retval RLRC_InternalError An internal error was encountered.
+*/
+static enum ReadLogReturnCodeEnum Sarg_ReadEntry(char *Line,struct ReadLogStruct *Entry)
+{
+       const char *Begin;
+       int IpLen;
+       int HttpCodeLen;
+       int UrlLen;
+       int UserLen;
+       int Day;
+       int Month;
+       int Year;
+       int Hour;
+       int Minute;
+       int Second;
+       static struct tm tt;
+       
+       if (strncmp(Line,"*** SARG Log ***",16)==0) {
+               InSargLog=true;
+               return(RLRC_Ignore);
+       }
+       if (!InSargLog) return(RLRC_Unknown);
+       
+       // get the date
+       Day=0;
+       while (isdigit(*Line)) Day=Day*10+(*Line++-'0');
+       if (*Line!='/' || Day<1 || Day>31) return(RLRC_Unknown);
+
+       ++Line;
+       Month=0;
+       while (isdigit(*Line)) Month=Month*10+(*Line++-'0');
+       if (*Line!='/') return(RLRC_Unknown);
+       if (Month<=0 || Month>12) return(RLRC_Unknown);
+
+       ++Line;
+       Year=0;
+       while (isdigit(*Line)) Year=Year*10+(*Line++-'0');
+       if (*Line!='\t' || Year<1900 || Year>2200) return(RLRC_Unknown);
+       
+       // get the time
+       ++Line;
+       Hour=0;
+       while (isdigit(*Line)) Hour=Hour*10+(*Line++-'0');
+       if (*Line!=':' || Hour>=24) return(RLRC_Unknown);
+       ++Line;
+       Minute=0;
+       while (isdigit(*Line)) Minute=Minute*10+(*Line++-'0');
+       if (*Line!=':' || Minute>=60) return(RLRC_Unknown);
+       ++Line;
+       Second=0;
+       while (isdigit(*Line)) Second=Second*10+(*Line++-'0');
+       if (*Line!='\t' || Second>60) return(RLRC_Unknown); //second can be 60 due to a leap second
+
+       tt.tm_year=Year-1900;
+       tt.tm_mon=Month-1;
+       tt.tm_mday=Day;
+       tt.tm_hour=Hour;
+       tt.tm_min=Minute;
+       tt.tm_sec=Second;
+       Entry->EntryTime=&tt;
+
+       // the ID of the user
+       Entry->User=++Line;
+       for (UserLen=0 ; *Line && *Line!='\t' ; UserLen++) Line++;
+       if (*Line!='\t' || UserLen==0) return(RLRC_Unknown);
+       
+       // get IP address
+       Entry->Ip=++Line;
+       for (IpLen=0 ; *Line && *Line!='\t' ; IpLen++) Line++;
+       if (*Line!='\t' || IpLen==0) return(RLRC_Unknown);
+
+       // get the URL
+       Entry->Url=++Line;
+       for (UrlLen=0 ; *Line && *Line!='\t' ; UrlLen++) Line++;
+       if (*Line!='\t' || UrlLen==0) return(RLRC_Unknown);
+       
+       // get the number of transfered bytes.
+       Begin=++Line;
+       Entry->DataSize=0LL;
+       while (isdigit(*Line)) Entry->DataSize=Entry->DataSize*10+(*Line++-'0');
+       if (*Line!='\t' || Begin==Line) return(RLRC_Unknown);
+       
+       // get the HTTP code.
+       Entry->HttpCode=++Line;
+       for (HttpCodeLen=0 ; *Line && *Line!='\t' ; HttpCodeLen++) Line++;
+       if (*Line!='\t' || HttpCodeLen==0) return(RLRC_Unknown);
+
+       // get the elapsed time.
+       Begin=++Line;
+       Entry->ElapsedTime=0L;
+       while (isdigit(*Line)) Entry->ElapsedTime=Entry->ElapsedTime*10+(*Line++-'0');
+       if (*Line!='\t' || Line==Begin) return(RLRC_Unknown);
+
+       // get the smart filter
+       
+       // it is safe to alter the line buffer now that we are returning a valid entry
+       Entry->Ip[IpLen]='\0';
+       Entry->HttpCode[HttpCodeLen]='\0';
+       Entry->Url[UrlLen]='\0';
+       Entry->User[UserLen]='\0';
+       
+       return(RLRC_NoError);
+}
+
+//! \brief Object to read a standard squid log format.
+const struct ReadLogProcessStruct ReadSargLog=
+{
+       /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */
+       N_("sarg log format"),
+       Sarg_NewFile,
+       Sarg_ReadEntry
+};
diff --git a/readlog_squid.c b/readlog_squid.c
new file mode 100644 (file)
index 0000000..a83ecd6
--- /dev/null
@@ -0,0 +1,131 @@
+/*
+ * SARG Squid Analysis Report Generator      http://sarg.sourceforge.net
+ *                                                            1998, 2012
+ *
+ * SARG donations:
+ *      please look at http://sarg.sourceforge.net/donations.php
+ * Support:
+ *     http://sourceforge.net/projects/sarg/forums/forum/363374
+ * ---------------------------------------------------------------------
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
+ */
+
+#include "include/conf.h"
+#include "include/defs.h"
+
+/*!
+A new file is being read. The name of the file is \a FileName.
+*/
+static void Squid_NewFile(const char *FileName)
+{
+}
+
+/*!
+Read one entry from a standard squid log format.
+
+\param Line One line from the input log file.
+\param Entry Where to store the information parsed from the line.
+
+\retval RLRC_NoError One valid entry is parsed.
+\retval RLRC_Unknown The line is invalid.
+\retval RLRC_InternalError An internal error was encountered.
+*/
+static enum ReadLogReturnCodeEnum Squid_ReadEntry(char *Line,struct ReadLogStruct *Entry)
+{
+       const char *Begin;
+       time_t log_time;
+       int IpLen;
+       int HttpCodeLen;
+       int UrlLen;
+       int UserLen;
+       
+       // get log time.
+       Begin=Line;
+       log_time=0;
+       while (isdigit(*Line)) log_time=log_time*10+(*Line++-'0');
+       if (*Line!='.' || Line==Begin) return(RLRC_Unknown);
+       
+       // ignore decimal part to log time.
+       Begin=++Line;
+       while (isdigit(*Line)) Line++;
+       if (*Line!=' ' || Line==Begin) return(RLRC_Unknown);
+       
+       // skip spaces before the elapsed time.
+       while (*Line==' ') Line++;
+       if (!isdigit(*Line)) return(RLRC_Unknown);
+       
+       // get the elapsed time.
+       Begin=Line;
+       Entry->ElapsedTime=0L;
+       while (isdigit(*Line)) Entry->ElapsedTime=Entry->ElapsedTime*10+(*Line++-'0');
+       if (*Line!=' ' || Line==Begin) return(RLRC_Unknown);
+
+       // get IP address. It can be a fqdn if that option is enabled in squid.
+       Entry->Ip=++Line;
+       for (IpLen=0 ; *Line && *Line!=' ' ; IpLen++) Line++;
+       if (*Line!=' ' || IpLen==0) return(RLRC_Unknown);
+
+       // get the HTTP code.
+       Entry->HttpCode=++Line;
+       for (HttpCodeLen=0 ; *Line && *Line!=' ' ; HttpCodeLen++) Line++;
+       if (*Line!=' ' || HttpCodeLen==0) return(RLRC_Unknown);
+
+       // get the number of transfered bytes.
+       Begin=++Line;
+       Entry->DataSize=0LL;
+       while (isdigit(*Line)) Entry->DataSize=Entry->DataSize*10+(*Line++-'0');
+       if (*Line!=' ' || Begin==Line) return(RLRC_Unknown);
+       
+       // skip the HTTP function
+       Begin=++Line;
+       while (*Line && *Line!=' ') Line++;
+       if (*Line!=' '|| Line==Begin) return(RLRC_Unknown);
+       
+       // the url
+       Entry->Url=++Line;
+       for (UrlLen=0 ; *Line && *Line!=' ' ; UrlLen++) Line++;
+       if (*Line!=' ' || UrlLen==0) return(RLRC_Unknown);
+       
+       // the ID of the user or - if the user is unidentified
+       Entry->User=++Line;
+       for (UserLen=0 ; *Line && *Line!=' ' ; UserLen++) Line++;
+       if (*Line!=' ' || UserLen==0) return(RLRC_Unknown);
+       
+       // now, the format is known with a good confidence. If the time doesn't decode, it is an error.
+       Entry->EntryTime=localtime(&log_time);
+       if (Entry->EntryTime == NULL) {
+               debuga(_("Cannot convert the timestamp from the squid log file\n"));
+               return(RLRC_InternalError);
+       }
+       
+       // it is safe to alter the line buffer now that we are returning a valid entry
+       Entry->Ip[IpLen]='\0';
+       Entry->HttpCode[HttpCodeLen]='\0';
+       Entry->Url[UrlLen]='\0';
+       Entry->User[UserLen]='\0';
+       
+       return(RLRC_NoError);
+}
+
+//! \brief Object to read a standard squid log format.
+const struct ReadLogProcessStruct ReadSquidLog=
+{
+       /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */
+       N_("squid log format"),
+       Squid_NewFile,
+       Squid_ReadEntry
+};