From 1c91da0774c8fdde9768e48a9835b552af42f57d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fr=C3=A9d=C3=A9ric=20Marchal?= Date: Sat, 25 Aug 2012 08:45:20 +0200 Subject: [PATCH] Modular reading of some input log formats Currently supported log formats are squid, common and sarg. --- CMakeLists.txt | 3 +- Makefile.in | 3 +- include/readlog.h | 25 +++++- readlog.c | 212 +++++++++++++++++++++++++++++----------------- readlog_common.c | 184 ++++++++++++++++++++++++++++++++++++++++ readlog_extlog.c | 59 +++++++++++++ readlog_sarg.c | 160 ++++++++++++++++++++++++++++++++++ readlog_squid.c | 131 ++++++++++++++++++++++++++++ 8 files changed, 693 insertions(+), 84 deletions(-) create mode 100644 readlog_common.c create mode 100644 readlog_extlog.c create mode 100644 readlog_sarg.c create mode 100644 readlog_squid.c diff --git a/CMakeLists.txt b/CMakeLists.txt index a9ea2d9..0e3d39e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,7 +52,8 @@ SET(SRC util.c log.c report.c topuser.c email.c sort.c html.c smartfilter.c denied.c authfail.c charset.c dichotomic.c redirector.c auth.c download.c grepday.c ip2name_exec.c dansguardian_log.c dansguardian_report.c realtime.c btree_cache.c - usertab.c userinfo.c longline.c url.c readlog.c) + usertab.c userinfo.c longline.c url.c readlog.c readlog_squid.c + readlog_sarg.c readlog_extlog.c readlog_common.c) FOREACH(f ${SRC}) ADD_FILE_DEPENDENCIES(${f} ${CMAKE_BINARY_DIR}/config.h ${CMAKE_SOURCE_DIR}/include/conf.h ${CMAKE_SOURCE_DIR}/include/info.h ${CMAKE_SOURCE_DIR}/include/defs.h) diff --git a/Makefile.in b/Makefile.in index 3bbf0b5..b6a7b1b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -38,7 +38,8 @@ SRCS = util.c log.c report.c topuser.c email.c sort.c html.c \ smartfilter.c denied.c authfail.c charset.c dichotomic.c \ redirector.c auth.c download.c grepday.c ip2name_exec.c \ dansguardian_log.c dansguardian_report.c realtime.c btree_cache.c \ - usertab.c userinfo.c longline.c url.c readlog.c + usertab.c userinfo.c longline.c url.c readlog.c readlog_squid.c \ + readlog_sarg.c readlog_extlog.c readlog_common.c OBJS = $(SRCS:.c=.o) diff --git a/include/readlog.h b/include/readlog.h index 2909a49..f895064 100644 --- a/include/readlog.h +++ b/include/readlog.h @@ -8,8 +8,12 @@ enum ReadLogReturnCodeEnum { //! Line successfuly read. RLRC_NoError, + //! Line is known and should be ignored. + RLRC_Ignore, //! Unknown line format. RLRC_Unknown, + //! Error encountered during the parsing of the file. + RLRC_InternalError, RLRC_LastRetCode //!< last entry of the list. }; @@ -21,11 +25,15 @@ struct ReadLogStruct { //! The time corresponding to the entry. struct tm *EntryTime; - //! The IP address connecting to internet + //! The IP address connecting to internet. char *Ip; //! The user's name. char *User; - //! The URL of the visited site. + /*! + The URL of the visited site. + + The pointer may be NULL if the URL doesn't exists in the log file. + */ char *Url; //! Time necessary to process the user's request. long int ElapsedTime; @@ -35,4 +43,17 @@ struct ReadLogStruct char *HttpCode; }; +/*! +\brief Functions to read a log file. +*/ +struct ReadLogProcessStruct +{ + //! The name of the log file processed by this object. + const char *Name; + //! Inform the module about the reading of a new file. + void (*NewFile)(const char *FileName); + //! Funtion to read one entry from the log. + enum ReadLogReturnCodeEnum (*ReadEntry)(char *Line,struct ReadLogStruct *Entry); +}; + #endif //READLOG_HEADER diff --git a/readlog.c b/readlog.c index cdc0a31..1de337d 100644 --- a/readlog.c +++ b/readlog.c @@ -43,6 +43,20 @@ numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 extern char *userfile; +extern const struct ReadLogProcessStruct ReadSquidLog; +extern const struct ReadLogProcessStruct ReadCommonLog; +extern const struct ReadLogProcessStruct ReadSargLog; +extern const struct ReadLogProcessStruct ReadExtLog; + +//! The list of the supported log formats. +static const struct ReadLogProcessStruct const *LogFormats[]= +{ + &ReadSquidLog, + &ReadCommonLog, + &ReadSargLog, + &ReadExtLog +}; + /*! Read the log files. @@ -53,39 +67,17 @@ Read the log files. */ int ReadLogFile(struct ReadLogDataStruct *Filter) { - enum isa_col_id { - ISACOL_Ip, - ISACOL_UserName, - ISACOL_Date, - ISACOL_Time, - ISACOL_TimeTaken, - ISACOL_Bytes, - ISACOL_Uri, - ISACOL_Status, - ISACOL_Last //last entry of the list ! - }; - enum InputLogFormat { - ILF_Unknown, - ILF_Squid, - ILF_Common, - ILF_Sarg, - ILF_Isa, - ILF_Last //last entry of the list ! - }; - - enum InputLogFormat ilf; - int ilf_count[ILF_Last]; longline line; char *linebuf; char *str; char arq_log[255]; - char fun[MAXLEN]; - char elap[255]; + //char fun[MAXLEN]; + //char elap[255]; char user[MAX_USER_LEN]; - char data[255]; - char ip[60]; + //char data[255]; + //char ip[60]; char hora[30]; - char mes[30]; + //char mes[30]; char tbuf2[128]; char dia[128]=""; char wuser[MAXLEN]; @@ -100,7 +92,7 @@ int ReadLogFile(struct ReadLogDataStruct *Filter) int blen; int OutputNonZero = REPORT_EVERY_X_LINES ; int idata=0; - int isa_ncols=0,isa_cols[ISACOL_Last]; + //int isa_ncols=0,isa_cols[ISACOL_Last]; int x; int hmr; int nopen; @@ -108,12 +100,14 @@ int ReadLogFile(struct ReadLogDataStruct *Filter) int mindate=0; int maxdate=0; int cstatus; + int current_format_idx; + int format_count[sizeof(LogFormats)/sizeof(*LogFormats)]; unsigned long int recs1=0UL; unsigned long int recs2=0UL; long int totregsl=0; long int totregsg=0; long int totregsx=0; - long long int iyear, imonth, iday; + //long long int iyear, imonth, iday; FILE *fp_in=NULL; FILE *fp_log=NULL; FILE *fp_Download_Unsort=NULL; @@ -122,18 +116,20 @@ int ReadLogFile(struct ReadLogDataStruct *Filter) bool download_flag=false; bool id_is_ip; bool totper=false; + enum ReadLogReturnCodeEnum log_entry_status; struct stat logstat; struct getwordstruct gwarea; - struct tm tt; + //struct tm tt; struct userfilestruct *prev_ufile; struct userinfostruct *uinfo; struct userfilestruct *first_user_file=NULL; struct userfilestruct *ufile; struct userfilestruct *ufile1; struct ReadLogStruct log_entry; - time_t tnum; + const struct ReadLogProcessStruct *current_format=NULL; + //time_t tnum; - for (ilf=0 ; ilfNewFile) + LogFormats[x]->NewFile(arq); + + if (arq[0]=='-' && arq[1]=='\0') { if(debug) debuga(_("Reading access log file: from stdin\n")); fp_in=stdin; @@ -179,7 +181,6 @@ int ReadLogFile(struct ReadLogDataStruct *Filter) from_stdin=false; } - ilf=ILF_Unknown; download_flag=false; recs1=0UL; @@ -215,11 +216,13 @@ int ReadLogFile(struct ReadLogDataStruct *Filter) while ((linebuf=longline_read(fp_in,line))!=NULL) { blen=strlen(linebuf); +#if 0 if (ilf==ILF_Unknown) { if(strncmp(linebuf,"#Software: Mic",14) == 0) { - fixendofline(linebuf); - if (debug) + if (debug) { + fixendofline(linebuf); debuga(_("Log is from Microsoft ISA: %s\n"),linebuf); + } ilf=ILF_Isa; ilf_count[ilf]++; continue; @@ -235,21 +238,7 @@ int ReadLogFile(struct ReadLogDataStruct *Filter) continue; } } - - if(!fp_log && ParsedOutputLog[0] && ilf!=ILF_Sarg) { - if(access(ParsedOutputLog,R_OK) != 0) { - my_mkdir(ParsedOutputLog); - } - if (snprintf(arq_log,sizeof(arq_log),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(arq_log)) { - debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog); - exit(EXIT_FAILURE); - } - if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) { - debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno)); - exit(EXIT_FAILURE); - } - fputs("*** SARG Log ***\n",fp_log); - } +#endif recs2++; if (ShowReadStatistics && --OutputNonZero<=0) { @@ -263,9 +252,14 @@ int ReadLogFile(struct ReadLogDataStruct *Filter) fflush (stdout); OutputNonZero = REPORT_EVERY_X_LINES ; } - if(blen < 58) continue; - if(strstr(linebuf,"HTTP/0.0") != 0) continue; - if(strstr(linebuf,"logfile turned over") != 0) continue; + + /* + The following checks are retained here as I don't know to + what format they apply. They date back to pre 2.4 versions. + */ + //if(blen < 58) continue; //this test conflict with the reading of the sarg log header line + if(strstr(linebuf,"HTTP/0.0") != 0) continue;//recorded by squid when encountering an incomplete query + if(strstr(linebuf,"logfile turned over") != 0) continue;//reported by newsyslog if(linebuf[0] == ' ') continue; // exclude_string @@ -291,7 +285,62 @@ int ReadLogFile(struct ReadLogDataStruct *Filter) if(debugm) printf("BUF=%s\n",linebuf); + // process the line + log_entry_status=RLRC_Unknown; memset(&log_entry,0,sizeof(log_entry)); + if (current_format) { + log_entry_status=current_format->ReadEntry(linebuf,&log_entry); + } + + // find out what line format to use + if (log_entry_status==RLRC_Unknown) { + x=-1; + while (log_entry_status==RLRC_Unknown && x<(int)(sizeof(LogFormats)/sizeof(*LogFormats))) { + x++; + if (LogFormats[x]==current_format) continue; + memset(&log_entry,0,sizeof(log_entry)); + log_entry_status=LogFormats[x]->ReadEntry(linebuf,&log_entry); + } + if (x<0 || x>=(int)(sizeof(LogFormats)/sizeof(*LogFormats))) { + debuga(_("Unknown line format found in input log file %s\n"),arq); + exit(EXIT_FAILURE); + } + current_format=LogFormats[x]; + current_format_idx=x; + if (debugz) { + /* TRANSLATORS: The argument is the log format name as translated by you. */ + debuga(_("Log format identified as \"%s\" for %s\n"),_(current_format->Name),arq); + } + } + if (log_entry_status==RLRC_Ignore) { + continue; + } + if (current_format_idx<0 || current_format==NULL) { + debuga(_("Sarg couldn't determine the format of the input log file %s\n"),arq); + exit(EXIT_FAILURE); + } + if (log_entry_status==RLRC_InternalError) { + debuga(_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),arq); + exit(EXIT_FAILURE); + } + format_count[current_format_idx]++; + + if (!fp_log && ParsedOutputLog[0] && current_format!=&ReadSargLog) { + if(access(ParsedOutputLog,R_OK) != 0) { + my_mkdir(ParsedOutputLog); + } + if (snprintf(arq_log,sizeof(arq_log),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(arq_log)) { + debuga(_("File name too long: %s/sarg_temp.log\n"),ParsedOutputLog); + exit(EXIT_FAILURE); + } + if((fp_log=MY_FOPEN(arq_log,"w"))==NULL) { + debuga(_("(log) Cannot open log file: %s - %s\n"),arq_log,strerror(errno)); + exit(EXIT_FAILURE); + } + fputs("*** SARG Log ***\n",fp_log); + } + +#if 0 if (ilf==ILF_Squid || ilf==ILF_Common || ilf==ILF_Unknown) { getword_start(&gwarea,linebuf); if (getword(data,sizeof(data),&gwarea,' ')<0) { @@ -619,6 +668,7 @@ int ReadLogFile(struct ReadLogDataStruct *Filter) } log_entry.EntryTime=&tt; } +#endif if (log_entry.EntryTime==NULL) { debuga(_("Unknown input log file format: no time\n")); break; @@ -679,7 +729,7 @@ int ReadLogFile(struct ReadLogDataStruct *Filter) for (str=log_entry.HttpCode ; *str ; str++) if (*str=='\t') *str=' '; - if(ilf!=ILF_Sarg) { + if (current_format!=&ReadSargLog) { /* The full URL is not saved in sarg log. There is no point in testing the URL to detect a downloaded file. @@ -757,8 +807,8 @@ int ReadLogFile(struct ReadLogDataStruct *Filter) } } - if(strcmp(log_entry.User,"-") ==0 || strcmp(log_entry.User," ") ==0 || - strcmp(log_entry.User,"") ==0 || strcmp(log_entry.User,":") ==0) + if (log_entry.User[0]=='\0' || (log_entry.User[1]=='\0' && (log_entry.User[0]=='-' || + log_entry.User[0]==' ' || log_entry.User[0]==':'))) continue; if (log_entry.DataSize<0) log_entry.DataSize=0; @@ -798,10 +848,10 @@ int ReadLogFile(struct ReadLogDataStruct *Filter) first_user_file=ufile; } } - #ifdef ENABLE_DOUBLE_CHECK_DATA +#ifdef ENABLE_DOUBLE_CHECK_DATA ufile->user->nbytes+=log_entry.DataSize; ufile->user->elap+=log_entry.ElapsedTime; - #endif +#endif if (ufile->file==NULL) { if (nopen>=maxopenfiles) { @@ -832,13 +882,18 @@ int ReadLogFile(struct ReadLogDataStruct *Filter) strftime(dia, sizeof(dia), "%d/%m/%Y", log_entry.EntryTime); strftime(hora,sizeof(hora),"%H:%M:%S",log_entry.EntryTime); - if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,log_entry.Ip,url,(uint64_t)log_entry.DataSize,log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) { + if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora, + log_entry.Ip,url,(uint64_t)log_entry.DataSize, + log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) { debuga(_("Write error in the log file of user %s\n"),log_entry.User); exit(EXIT_FAILURE); } - if(fp_log && ilf!=ILF_Sarg) - fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,log_entry.HttpCode,log_entry.ElapsedTime,smartfilter); + if (fp_log && current_format!=&ReadSargLog) { + fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora, + log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize, + log_entry.HttpCode,log_entry.ElapsedTime,smartfilter); + } totregsg++; @@ -851,13 +906,14 @@ int ReadLogFile(struct ReadLogDataStruct *Filter) exit (1); } } - fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora,log_entry.User,log_entry.Ip,download_url); + fprintf(fp_Download_Unsort,"%s\t%s\t%s\t%s\t%s\n",dia,hora, + log_entry.User,log_entry.Ip,download_url); } denied_write(&log_entry); authfail_write(&log_entry); - if (ilf!=ILF_Sarg) { + if (current_format!=&ReadSargLog) { if(!totper || idata0 && ilf_count[ILF_Squid]>0) - debuga(_("Log with mixed records format (squid and common log)\n")); - - if(ilf_count[ILF_Common]>0 && ilf_count[ILF_Squid]==0) - debuga(_("Common log format\n")); - - if(ilf_count[ILF_Common]==0 && ilf_count[ILF_Squid]>0) - debuga(_("Squid log format\n")); - - if(ilf_count[ILF_Sarg]>0) - debuga(_("Sarg log format\n")); + for (x=0 ; x0) { + /* TRANSLATORS: It displays the number of lines found in the input log files + * for each supported log format. The log format name is the %s and is a string + * you translate somewhere else. */ + debuga(_("%s: %d entries\n"),_(LogFormats[x]->Name),format_count[x]); + totalcount+=format_count[x]; + } + } - if(totalcount==0 && totregsg) + if (totalcount==0 && totregsg) debuga(_("Log with invalid format\n")); } - if(debugz){ + if(debugz) { debugaz(_("date=%s\n"),dia); debugaz(_("period=%s\n"),period.text); } diff --git a/readlog_common.c b/readlog_common.c new file mode 100644 index 0000000..2a53130 --- /dev/null +++ b/readlog_common.c @@ -0,0 +1,184 @@ +/* + * SARG Squid Analysis Report Generator http://sarg.sourceforge.net + * 1998, 2012 + * + * SARG donations: + * please look at http://sarg.sourceforge.net/donations.php + * Support: + * http://sourceforge.net/projects/sarg/forums/forum/363374 + * --------------------------------------------------------------------- + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#include "include/conf.h" +#include "include/defs.h" + +/*! +A new file is being read. The name of the file is \a FileName. +*/ +static void Common_NewFile(const char *FileName) +{ +} + +/*! +Read one entry from a standard squid log format. + +\param Line One line from the input log file. +\param Entry Where to store the information parsed from the line. + +\retval RLRC_NoError One valid entry is parsed. +\retval RLRC_Unknown The line is invalid. +\retval RLRC_InternalError An internal error was encountered. +*/ +static enum ReadLogReturnCodeEnum Common_ReadEntry(char *Line,struct ReadLogStruct *Entry) +{ + const char *Begin; + int IpLen; + int HttpCodeLen; + int UrlLen; + int UserLen; + int Day; + char MonthName[4]; + int MonthNameLen; + int Month; + int Year; + int Hour; + int Minute; + int Second; + static struct tm tt; + + // get IP address + Entry->Ip=Line; + for (IpLen=0 ; *Line && *Line!=' ' ; IpLen++) Line++; + if (*Line!=' ' || IpLen==0) return(RLRC_Unknown); + + if (!squid24) { + // squid version <= 2.4 store the user ID in the second column: skip the first column here + Begin=++Line; + while (*Line && *Line!=' ') Line++; + if (*Line!=' '|| Line==Begin) return(RLRC_Unknown); + } + + // the ID of the user or - if the user is unidentified + Entry->User=++Line; + for (UserLen=0 ; *Line && *Line!=' ' ; UserLen++) Line++; + if (*Line!=' ' || UserLen==0) return(RLRC_Unknown); + + if (squid24) { + // squid version > 2.4 store the user ID in the first column: skip the second column here + Begin=++Line; + while (*Line && *Line!=' ') Line++; + if (*Line!=' '|| Line==Begin) return(RLRC_Unknown); + } + + // get the date enclosed within square brackets + ++Line; + if (*Line!='[') return(RLRC_Unknown); + ++Line; + Day=0; + while (isdigit(*Line)) Day=Day*10+(*Line++-'0'); + if (*Line!='/' || Day<1 || Day>31) return(RLRC_Unknown); + + ++Line; + for (MonthNameLen=0 ; MonthNameLen=12) return(RLRC_Unknown); + + ++Line; + Year=0; + while (isdigit(*Line)) Year=Year*10+(*Line++-'0'); + if (*Line!=':' || Year<1900 || Year>2200) return(RLRC_Unknown); + + // get the time + ++Line; + Hour=0; + while (isdigit(*Line)) Hour=Hour*10+(*Line++-'0'); + if (*Line!=':' || Hour>=24) return(RLRC_Unknown); + ++Line; + Minute=0; + while (isdigit(*Line)) Minute=Minute*10+(*Line++-'0'); + if (*Line!=':' || Minute>=60) return(RLRC_Unknown); + ++Line; + Second=0; + while (isdigit(*Line)) Second=Second*10+(*Line++-'0'); + if (*Line!=' ' || Second>60) return(RLRC_Unknown); //second can be 60 due to a leap second + + // skip the timezone up to the closing ] + while (*Line && *Line!=']') Line++; + if (*Line!=']') return(RLRC_Unknown); + + tt.tm_year=Year-1900; + tt.tm_mon=Month; + tt.tm_mday=Day; + tt.tm_hour=Hour; + tt.tm_min=Minute; + tt.tm_sec=Second; + Entry->EntryTime=&tt; + + // the URL is enclosed between double qhotes + ++Line; + if (*Line!=' ') return(RLRC_Unknown); + ++Line; + if (*Line!='\"') return(RLRC_Unknown); + + // skip the HTTP function + Begin=++Line; + while (isalpha(*Line)) Line++; + if (*Line!=' ' || Line==Begin) return(RLRC_Unknown); + + // get the URL + Entry->Url=++Line; + for (UrlLen=0 ; *Line && *Line!=' ' ; UrlLen++) Line++; + if (*Line!=' ' || UrlLen==0) return(RLRC_Unknown); + + // skip the HTTP/... + ++Line; + while (*Line && *Line!='\"') Line++; + if (*Line!='\"') return(RLRC_Unknown); + ++Line; + if (*Line!=' ') return(RLRC_Unknown); + + // get the HTTP code. + Entry->HttpCode=++Line; + for (HttpCodeLen=0 ; *Line && *Line!=' ' ; HttpCodeLen++) Line++; + if (*Line!=' ' || HttpCodeLen==0) return(RLRC_Unknown); + + // get the number of transfered bytes. + Begin=++Line; + Entry->DataSize=0LL; + while (isdigit(*Line)) Entry->DataSize=Entry->DataSize*10+(*Line++-'0'); + if (*Line!=' ' || Begin==Line) return(RLRC_Unknown); + + // it is safe to alter the line buffer now that we are returning a valid entry + Entry->Ip[IpLen]='\0'; + Entry->HttpCode[HttpCodeLen]='\0'; + Entry->Url[UrlLen]='\0'; + Entry->User[UserLen]='\0'; + + return(RLRC_NoError); +} + +//! \brief Object to read a standard common log format. +const struct ReadLogProcessStruct ReadCommonLog= +{ + /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */ + N_("common log format"), + Common_NewFile, + Common_ReadEntry +}; diff --git a/readlog_extlog.c b/readlog_extlog.c new file mode 100644 index 0000000..889a5a4 --- /dev/null +++ b/readlog_extlog.c @@ -0,0 +1,59 @@ +/* + * SARG Squid Analysis Report Generator http://sarg.sourceforge.net + * 1998, 2012 + * + * SARG donations: + * please look at http://sarg.sourceforge.net/donations.php + * Support: + * http://sourceforge.net/projects/sarg/forums/forum/363374 + * --------------------------------------------------------------------- + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#include "include/conf.h" +#include "include/defs.h" + +/*! +A new file is being read. The name of the file is \a FileName. +*/ +static void ExtLog_NewFile(const char *FileName) +{ +} + +/*! +Read one entry from an extended log. + +\param Line One line from the input log file. +\param Entry Where to store the information parsed from the line. + +\retval RLRC_NoError One valid entry is parsed. +\retval RLRC_Unknown The line is invalid. +\retval RLRC_InternalError An internal error was encountered. +*/ +static enum ReadLogReturnCodeEnum ExtLog_ReadEntry(char *Line,struct ReadLogStruct *Entry) +{ + return(RLRC_NoError); +} + +//! \brief Object to read an extended log. +const struct ReadLogProcessStruct ReadExtLog= +{ + /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */ + N_("extended log format"), + ExtLog_NewFile, + ExtLog_ReadEntry +}; diff --git a/readlog_sarg.c b/readlog_sarg.c new file mode 100644 index 0000000..7eda5b4 --- /dev/null +++ b/readlog_sarg.c @@ -0,0 +1,160 @@ +/* + * SARG Squid Analysis Report Generator http://sarg.sourceforge.net + * 1998, 2012 + * + * SARG donations: + * please look at http://sarg.sourceforge.net/donations.php + * Support: + * http://sourceforge.net/projects/sarg/forums/forum/363374 + * --------------------------------------------------------------------- + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#include "include/conf.h" +#include "include/defs.h" + +//! \c True if the current log is known to be a sarg parsed log. +static bool InSargLog=false; + +/*! +A new file is being read. The name of the file is \a FileName. +*/ +static void Sarg_NewFile(const char *FileName) +{ + InSargLog=false; +} + +/*! +Read one entry from a sarg generated log. + +\param Line One line from the input log file. +\param Entry Where to store the information parsed from the line. + +\retval RLRC_NoError One valid entry is parsed. +\retval RLRC_Unknown The line is invalid. +\retval RLRC_InternalError An internal error was encountered. +*/ +static enum ReadLogReturnCodeEnum Sarg_ReadEntry(char *Line,struct ReadLogStruct *Entry) +{ + const char *Begin; + int IpLen; + int HttpCodeLen; + int UrlLen; + int UserLen; + int Day; + int Month; + int Year; + int Hour; + int Minute; + int Second; + static struct tm tt; + + if (strncmp(Line,"*** SARG Log ***",16)==0) { + InSargLog=true; + return(RLRC_Ignore); + } + if (!InSargLog) return(RLRC_Unknown); + + // get the date + Day=0; + while (isdigit(*Line)) Day=Day*10+(*Line++-'0'); + if (*Line!='/' || Day<1 || Day>31) return(RLRC_Unknown); + + ++Line; + Month=0; + while (isdigit(*Line)) Month=Month*10+(*Line++-'0'); + if (*Line!='/') return(RLRC_Unknown); + if (Month<=0 || Month>12) return(RLRC_Unknown); + + ++Line; + Year=0; + while (isdigit(*Line)) Year=Year*10+(*Line++-'0'); + if (*Line!='\t' || Year<1900 || Year>2200) return(RLRC_Unknown); + + // get the time + ++Line; + Hour=0; + while (isdigit(*Line)) Hour=Hour*10+(*Line++-'0'); + if (*Line!=':' || Hour>=24) return(RLRC_Unknown); + ++Line; + Minute=0; + while (isdigit(*Line)) Minute=Minute*10+(*Line++-'0'); + if (*Line!=':' || Minute>=60) return(RLRC_Unknown); + ++Line; + Second=0; + while (isdigit(*Line)) Second=Second*10+(*Line++-'0'); + if (*Line!='\t' || Second>60) return(RLRC_Unknown); //second can be 60 due to a leap second + + tt.tm_year=Year-1900; + tt.tm_mon=Month-1; + tt.tm_mday=Day; + tt.tm_hour=Hour; + tt.tm_min=Minute; + tt.tm_sec=Second; + Entry->EntryTime=&tt; + + // the ID of the user + Entry->User=++Line; + for (UserLen=0 ; *Line && *Line!='\t' ; UserLen++) Line++; + if (*Line!='\t' || UserLen==0) return(RLRC_Unknown); + + // get IP address + Entry->Ip=++Line; + for (IpLen=0 ; *Line && *Line!='\t' ; IpLen++) Line++; + if (*Line!='\t' || IpLen==0) return(RLRC_Unknown); + + // get the URL + Entry->Url=++Line; + for (UrlLen=0 ; *Line && *Line!='\t' ; UrlLen++) Line++; + if (*Line!='\t' || UrlLen==0) return(RLRC_Unknown); + + // get the number of transfered bytes. + Begin=++Line; + Entry->DataSize=0LL; + while (isdigit(*Line)) Entry->DataSize=Entry->DataSize*10+(*Line++-'0'); + if (*Line!='\t' || Begin==Line) return(RLRC_Unknown); + + // get the HTTP code. + Entry->HttpCode=++Line; + for (HttpCodeLen=0 ; *Line && *Line!='\t' ; HttpCodeLen++) Line++; + if (*Line!='\t' || HttpCodeLen==0) return(RLRC_Unknown); + + // get the elapsed time. + Begin=++Line; + Entry->ElapsedTime=0L; + while (isdigit(*Line)) Entry->ElapsedTime=Entry->ElapsedTime*10+(*Line++-'0'); + if (*Line!='\t' || Line==Begin) return(RLRC_Unknown); + + // get the smart filter + + // it is safe to alter the line buffer now that we are returning a valid entry + Entry->Ip[IpLen]='\0'; + Entry->HttpCode[HttpCodeLen]='\0'; + Entry->Url[UrlLen]='\0'; + Entry->User[UserLen]='\0'; + + return(RLRC_NoError); +} + +//! \brief Object to read a standard squid log format. +const struct ReadLogProcessStruct ReadSargLog= +{ + /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */ + N_("sarg log format"), + Sarg_NewFile, + Sarg_ReadEntry +}; diff --git a/readlog_squid.c b/readlog_squid.c new file mode 100644 index 0000000..a83ecd6 --- /dev/null +++ b/readlog_squid.c @@ -0,0 +1,131 @@ +/* + * SARG Squid Analysis Report Generator http://sarg.sourceforge.net + * 1998, 2012 + * + * SARG donations: + * please look at http://sarg.sourceforge.net/donations.php + * Support: + * http://sourceforge.net/projects/sarg/forums/forum/363374 + * --------------------------------------------------------------------- + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. + * + */ + +#include "include/conf.h" +#include "include/defs.h" + +/*! +A new file is being read. The name of the file is \a FileName. +*/ +static void Squid_NewFile(const char *FileName) +{ +} + +/*! +Read one entry from a standard squid log format. + +\param Line One line from the input log file. +\param Entry Where to store the information parsed from the line. + +\retval RLRC_NoError One valid entry is parsed. +\retval RLRC_Unknown The line is invalid. +\retval RLRC_InternalError An internal error was encountered. +*/ +static enum ReadLogReturnCodeEnum Squid_ReadEntry(char *Line,struct ReadLogStruct *Entry) +{ + const char *Begin; + time_t log_time; + int IpLen; + int HttpCodeLen; + int UrlLen; + int UserLen; + + // get log time. + Begin=Line; + log_time=0; + while (isdigit(*Line)) log_time=log_time*10+(*Line++-'0'); + if (*Line!='.' || Line==Begin) return(RLRC_Unknown); + + // ignore decimal part to log time. + Begin=++Line; + while (isdigit(*Line)) Line++; + if (*Line!=' ' || Line==Begin) return(RLRC_Unknown); + + // skip spaces before the elapsed time. + while (*Line==' ') Line++; + if (!isdigit(*Line)) return(RLRC_Unknown); + + // get the elapsed time. + Begin=Line; + Entry->ElapsedTime=0L; + while (isdigit(*Line)) Entry->ElapsedTime=Entry->ElapsedTime*10+(*Line++-'0'); + if (*Line!=' ' || Line==Begin) return(RLRC_Unknown); + + // get IP address. It can be a fqdn if that option is enabled in squid. + Entry->Ip=++Line; + for (IpLen=0 ; *Line && *Line!=' ' ; IpLen++) Line++; + if (*Line!=' ' || IpLen==0) return(RLRC_Unknown); + + // get the HTTP code. + Entry->HttpCode=++Line; + for (HttpCodeLen=0 ; *Line && *Line!=' ' ; HttpCodeLen++) Line++; + if (*Line!=' ' || HttpCodeLen==0) return(RLRC_Unknown); + + // get the number of transfered bytes. + Begin=++Line; + Entry->DataSize=0LL; + while (isdigit(*Line)) Entry->DataSize=Entry->DataSize*10+(*Line++-'0'); + if (*Line!=' ' || Begin==Line) return(RLRC_Unknown); + + // skip the HTTP function + Begin=++Line; + while (*Line && *Line!=' ') Line++; + if (*Line!=' '|| Line==Begin) return(RLRC_Unknown); + + // the url + Entry->Url=++Line; + for (UrlLen=0 ; *Line && *Line!=' ' ; UrlLen++) Line++; + if (*Line!=' ' || UrlLen==0) return(RLRC_Unknown); + + // the ID of the user or - if the user is unidentified + Entry->User=++Line; + for (UserLen=0 ; *Line && *Line!=' ' ; UserLen++) Line++; + if (*Line!=' ' || UserLen==0) return(RLRC_Unknown); + + // now, the format is known with a good confidence. If the time doesn't decode, it is an error. + Entry->EntryTime=localtime(&log_time); + if (Entry->EntryTime == NULL) { + debuga(_("Cannot convert the timestamp from the squid log file\n")); + return(RLRC_InternalError); + } + + // it is safe to alter the line buffer now that we are returning a valid entry + Entry->Ip[IpLen]='\0'; + Entry->HttpCode[HttpCodeLen]='\0'; + Entry->Url[UrlLen]='\0'; + Entry->User[UserLen]='\0'; + + return(RLRC_NoError); +} + +//! \brief Object to read a standard squid log format. +const struct ReadLogProcessStruct ReadSquidLog= +{ + /* TRANSLATORS: This is the name of the log format displayed when this format is detected in an input log file. */ + N_("squid log format"), + Squid_NewFile, + Squid_ReadEntry +}; -- 2.47.2