From: Frederic Marchal Date: Fri, 24 Jul 2015 07:48:44 +0000 (+0200) Subject: Read an extended log even if cs-uri is split over several columns X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e927e4230cd47ae0e265096895f4c5f9f7fea79f;p=thirdparty%2Fsarg.git Read an extended log even if cs-uri is split over several columns Sarg used to require that the visited URL be stored in column cs-uri of the extended log format. But the URL can be split over the cs-uri-scheme, cs-host, cs-uri-port, cs-uri-path and cs-uri-query columns. Sarg detects the columns and re-create the full URL if cs-uri is not found. --- diff --git a/readlog_extlog.c b/readlog_extlog.c index 74c3eb8..9718008 100644 --- a/readlog_extlog.c +++ b/readlog_extlog.c @@ -44,6 +44,11 @@ enum ext_col_id { EXTCOL_TimeTaken, EXTCOL_Bytes, EXTCOL_Uri, + EXTCOL_Scheme, + EXTCOL_Host, + EXTCOL_Port, + EXTCOL_Path, + EXTCOL_Query, EXTCOL_Status, EXTCOL_Last //last entry of the list ! }; @@ -56,6 +61,8 @@ static int ExtCols[EXTCOL_Last]; static char ExtColSep[MAX_EXT_COLUMNS]; //! The number of columns according to the "fields" directive. static int ExtColNumber; +//! Temporary buffer to concatenate the url. +static char ExtTempUrl[MAX_URL_LEN]; /*! A new file is being read. The name of the file is \a FileName. @@ -138,6 +145,8 @@ static bool ExtLog_Fields(const char *columns) if (strncasecmp(columns,"c-dns",len)==0) col_id=EXTCOL_Ip; } else if (len==6) { if (strncasecmp(columns,"cs-uri",len)==0) col_id=EXTCOL_Uri; + } else if (len==7) { + if (strncasecmp(columns,"cs-host",len)==0) col_id=EXTCOL_Host; } else if (len==8) { if (strncasecmp(columns,"sc-bytes",len)==0) col_id=EXTCOL_Bytes; } else if (len==9) { @@ -146,6 +155,12 @@ static bool ExtLog_Fields(const char *columns) if (strncasecmp(columns,"time-taken",len)==0) col_id=EXTCOL_TimeTaken; } else if (len==11) { if (strncasecmp(columns,"cs-username",len)==0) col_id=EXTCOL_UserName; + if (strncasecmp(columns,"cs-uri-port",len)==0) col_id=EXTCOL_Port; + if (strncasecmp(columns,"cs-uri-path",len)==0) col_id=EXTCOL_Path; + } else if (len==12) { + if (strncasecmp(columns,"cs-uri-query",len)==0) col_id=EXTCOL_Query; + } else if (len==13) { + if (strncasecmp(columns,"cs-uri-scheme",len)==0) col_id=EXTCOL_Scheme; } if (col_id!=EXTCOL_Last) { ExtCols[col_id]=col; @@ -369,6 +384,86 @@ static void ExtLog_FixString(char *string,char *end_ptr) *dest='\0'; } +/*! + * Discard a empty string. + * + * An empty string may contain a single dash. + * + * \param String The string to check. + * + * \return The string pointer if it isn't empty or NULL if the string + * is empty. + */ +static const char *ExtLog_FixEmptyString(const char *String) +{ + if (String && (String[0]=='\0' || (String[0]=='-' && String[1]=='\0'))) String=NULL; + return(String); +} + +/*! + * Create the URL from the split elements. + */ +static char *ExtLog_ConcatUrl(const char *Scheme,const char *Host,const char *Port,const char *Path,const char *Query) +{ + int tlen=0; + int len; + + Host=ExtLog_FixEmptyString(Host); + if (!Host) return(NULL); + Scheme=ExtLog_FixEmptyString(Scheme); + Port=ExtLog_FixEmptyString(Port); + Path=ExtLog_FixEmptyString(Path); + Query=ExtLog_FixEmptyString(Query); + + if (Scheme) + { + len=strlen(Scheme); + if (tlen+len+3>=sizeof(ExtTempUrl)) + { + debuga(__FILE__,__LINE__,_("URI scheme too long in log file\n")); + exit(EXIT_FAILURE); + } + strcpy(ExtTempUrl,Scheme); + strcpy(ExtTempUrl+len,"://"); + tlen+=len+3; + } + + len=strlen(Host); + if (tlen+len>=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-1; + strncpy(ExtTempUrl+tlen,Host,len); + tlen+=len; + ExtTempUrl[tlen]='\0'; + + if (tlen+2=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-2; + ExtTempUrl[tlen++]=':'; + strncpy(ExtTempUrl+tlen,Port,len); + tlen+=len; + ExtTempUrl[tlen]='\0'; + } + + if (tlen=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-1; + strncpy(ExtTempUrl+tlen,Path,len); + tlen+=len; + ExtTempUrl[tlen]='\0'; + } + + if (tlen=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-1; + strncpy(ExtTempUrl+tlen,Query,len); + tlen+=len; + ExtTempUrl[tlen]='\0'; + } + return(ExtTempUrl); +} + /*! Read one entry from an extended log. @@ -388,6 +483,11 @@ static enum ReadLogReturnCodeEnum ExtLog_ReadEntry(char *Line,struct ReadLogStru char *UserEnd; char *UrlEnd; char *HttpCodeEnd; + char *UrlScheme=NULL,*UrlSchemeEnd; + char *UrlHost=NULL,*UrlHostEnd; + char *UrlPort=NULL,*UrlPortEnd; + char *UrlPath=NULL,*UrlPathEnd; + char *UrlQuery=NULL,*UrlQueryEnd; // is it a directive if (*Line=='#') { @@ -437,6 +537,31 @@ static enum ReadLogReturnCodeEnum ExtLog_ReadEntry(char *Line,struct ReadLogStru Line=ExtLog_GetString(Line,col,&UrlEnd); if (!Line) return(RLRC_Unknown); break; + case EXTCOL_Scheme: + UrlScheme=Line; + Line=ExtLog_GetString(Line,col,&UrlSchemeEnd); + if (!Line) return(RLRC_Unknown); + break; + case EXTCOL_Host: + UrlHost=Line; + Line=ExtLog_GetString(Line,col,&UrlHostEnd); + if (!Line) return(RLRC_Unknown); + break; + case EXTCOL_Port: + UrlPort=Line; + Line=ExtLog_GetString(Line,col,&UrlPortEnd); + if (!Line) return(RLRC_Unknown); + break; + case EXTCOL_Path: + UrlPath=Line; + Line=ExtLog_GetString(Line,col,&UrlPathEnd); + if (!Line) return(RLRC_Unknown); + break; + case EXTCOL_Query: + UrlQuery=Line; + Line=ExtLog_GetString(Line,col,&UrlQueryEnd); + if (!Line) return(RLRC_Unknown); + break; case EXTCOL_Status: Entry->HttpCode=Line; Line=ExtLog_GetString(Line,col,&HttpCodeEnd); @@ -466,6 +591,15 @@ static enum ReadLogReturnCodeEnum ExtLog_ReadEntry(char *Line,struct ReadLogStru ExtLog_FixString(Entry->User,UserEnd); ExtLog_FixString(Entry->Url,UrlEnd); ExtLog_FixString(Entry->HttpCode,HttpCodeEnd); + if (!Entry->Url) + { + ExtLog_FixString(UrlScheme,UrlSchemeEnd); + ExtLog_FixString(UrlHost,UrlHostEnd); + ExtLog_FixString(UrlPort,UrlPortEnd); + ExtLog_FixString(UrlPath,UrlPathEnd); + ExtLog_FixString(UrlQuery,UrlQueryEnd); + Entry->Url=ExtLog_ConcatUrl(UrlScheme,UrlHost,UrlPort,UrlPath,UrlQuery); + } return(RLRC_NoError); }