]> git.ipfire.org Git - thirdparty/sarg.git/commitdiff
Read an extended log even if cs-uri is split over several columns
authorFrederic Marchal <fmarchal@users.sourceforge.net>
Fri, 24 Jul 2015 07:48:44 +0000 (09:48 +0200)
committerFrederic Marchal <fmarchal@users.sourceforge.net>
Fri, 24 Jul 2015 07:48:44 +0000 (09:48 +0200)
Sarg used to require that the visited URL be stored in column cs-uri of the
extended log format. But the URL can be split over the cs-uri-scheme,
cs-host, cs-uri-port, cs-uri-path and cs-uri-query columns.

Sarg detects the columns and re-create the full URL if cs-uri is not
found.

readlog_extlog.c

index 74c3eb874d6e6a7ab254466e14a3af318fcdef5f..9718008b77aadbc58fccbc0b35e0a47565746815 100644 (file)
@@ -44,6 +44,11 @@ enum ext_col_id {
        EXTCOL_TimeTaken,
        EXTCOL_Bytes,
        EXTCOL_Uri,
+       EXTCOL_Scheme,
+       EXTCOL_Host,
+       EXTCOL_Port,
+       EXTCOL_Path,
+       EXTCOL_Query,
        EXTCOL_Status,
        EXTCOL_Last //last entry of the list !
 };
@@ -56,6 +61,8 @@ static int ExtCols[EXTCOL_Last];
 static char ExtColSep[MAX_EXT_COLUMNS];
 //! The number of columns according to the "fields" directive.
 static int ExtColNumber;
+//! Temporary buffer to concatenate the url.
+static char ExtTempUrl[MAX_URL_LEN];
 
 /*!
 A new file is being read. The name of the file is \a FileName.
@@ -138,6 +145,8 @@ static bool ExtLog_Fields(const char *columns)
                        if (strncasecmp(columns,"c-dns",len)==0) col_id=EXTCOL_Ip;
                } else if (len==6) {
                        if (strncasecmp(columns,"cs-uri",len)==0) col_id=EXTCOL_Uri;
+               } else if (len==7) {
+                       if (strncasecmp(columns,"cs-host",len)==0) col_id=EXTCOL_Host;
                } else if (len==8) {
                        if (strncasecmp(columns,"sc-bytes",len)==0) col_id=EXTCOL_Bytes;
                } else if (len==9) {
@@ -146,6 +155,12 @@ static bool ExtLog_Fields(const char *columns)
                        if (strncasecmp(columns,"time-taken",len)==0) col_id=EXTCOL_TimeTaken;
                } else if (len==11) {
                        if (strncasecmp(columns,"cs-username",len)==0) col_id=EXTCOL_UserName;
+                       if (strncasecmp(columns,"cs-uri-port",len)==0) col_id=EXTCOL_Port;
+                       if (strncasecmp(columns,"cs-uri-path",len)==0) col_id=EXTCOL_Path;
+               } else if (len==12) {
+                       if (strncasecmp(columns,"cs-uri-query",len)==0) col_id=EXTCOL_Query;
+               } else if (len==13) {
+                       if (strncasecmp(columns,"cs-uri-scheme",len)==0) col_id=EXTCOL_Scheme;
                }
                if (col_id!=EXTCOL_Last) {
                        ExtCols[col_id]=col;
@@ -369,6 +384,86 @@ static void ExtLog_FixString(char *string,char *end_ptr)
        *dest='\0';
 }
 
+/*!
+ * Discard a empty string.
+ *
+ * An empty string may contain a single dash.
+ *
+ * \param String The string to check.
+ *
+ * \return The string pointer if it isn't empty or NULL if the string
+ * is empty.
+ */
+static const char *ExtLog_FixEmptyString(const char *String)
+{
+       if (String && (String[0]=='\0' || (String[0]=='-' && String[1]=='\0'))) String=NULL;
+       return(String);
+}
+
+/*!
+ * Create the URL from the split elements.
+ */
+static char *ExtLog_ConcatUrl(const char *Scheme,const char *Host,const char *Port,const char *Path,const char *Query)
+{
+       int tlen=0;
+       int len;
+
+       Host=ExtLog_FixEmptyString(Host);
+       if (!Host) return(NULL);
+       Scheme=ExtLog_FixEmptyString(Scheme);
+       Port=ExtLog_FixEmptyString(Port);
+       Path=ExtLog_FixEmptyString(Path);
+       Query=ExtLog_FixEmptyString(Query);
+
+       if (Scheme)
+       {
+               len=strlen(Scheme);
+               if (tlen+len+3>=sizeof(ExtTempUrl))
+               {
+                       debuga(__FILE__,__LINE__,_("URI scheme too long in log file\n"));
+                       exit(EXIT_FAILURE);
+               }
+               strcpy(ExtTempUrl,Scheme);
+               strcpy(ExtTempUrl+len,"://");
+               tlen+=len+3;
+       }
+
+       len=strlen(Host);
+       if (tlen+len>=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-1;
+       strncpy(ExtTempUrl+tlen,Host,len);
+       tlen+=len;
+       ExtTempUrl[tlen]='\0';
+
+       if (tlen+2<sizeof(ExtTempUrl) && Port)
+       {
+               len=strlen(Port);
+               if (tlen+len+1>=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-2;
+               ExtTempUrl[tlen++]=':';
+               strncpy(ExtTempUrl+tlen,Port,len);
+               tlen+=len;
+               ExtTempUrl[tlen]='\0';
+       }
+
+       if (tlen<sizeof(ExtTempUrl) && Path)
+       {
+               len=strlen(Path);
+               if (tlen+len>=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-1;
+               strncpy(ExtTempUrl+tlen,Path,len);
+               tlen+=len;
+               ExtTempUrl[tlen]='\0';
+       }
+
+       if (tlen<sizeof(ExtTempUrl) && Query)
+       {
+               len=strlen(Query);
+               if (tlen+len>=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-1;
+               strncpy(ExtTempUrl+tlen,Query,len);
+               tlen+=len;
+               ExtTempUrl[tlen]='\0';
+       }
+       return(ExtTempUrl);
+}
+
 /*!
 Read one entry from an extended log.
 
@@ -388,6 +483,11 @@ static enum ReadLogReturnCodeEnum ExtLog_ReadEntry(char *Line,struct ReadLogStru
        char *UserEnd;
        char *UrlEnd;
        char *HttpCodeEnd;
+       char *UrlScheme=NULL,*UrlSchemeEnd;
+       char *UrlHost=NULL,*UrlHostEnd;
+       char *UrlPort=NULL,*UrlPortEnd;
+       char *UrlPath=NULL,*UrlPathEnd;
+       char *UrlQuery=NULL,*UrlQueryEnd;
 
        // is it a directive
        if (*Line=='#') {
@@ -437,6 +537,31 @@ static enum ReadLogReturnCodeEnum ExtLog_ReadEntry(char *Line,struct ReadLogStru
                                Line=ExtLog_GetString(Line,col,&UrlEnd);
                                if (!Line) return(RLRC_Unknown);
                                break;
+                       case EXTCOL_Scheme:
+                               UrlScheme=Line;
+                               Line=ExtLog_GetString(Line,col,&UrlSchemeEnd);
+                               if (!Line) return(RLRC_Unknown);
+                               break;
+                       case EXTCOL_Host:
+                               UrlHost=Line;
+                               Line=ExtLog_GetString(Line,col,&UrlHostEnd);
+                               if (!Line) return(RLRC_Unknown);
+                               break;
+                       case EXTCOL_Port:
+                               UrlPort=Line;
+                               Line=ExtLog_GetString(Line,col,&UrlPortEnd);
+                               if (!Line) return(RLRC_Unknown);
+                               break;
+                       case EXTCOL_Path:
+                               UrlPath=Line;
+                               Line=ExtLog_GetString(Line,col,&UrlPathEnd);
+                               if (!Line) return(RLRC_Unknown);
+                               break;
+                       case EXTCOL_Query:
+                               UrlQuery=Line;
+                               Line=ExtLog_GetString(Line,col,&UrlQueryEnd);
+                               if (!Line) return(RLRC_Unknown);
+                               break;
                        case EXTCOL_Status:
                                Entry->HttpCode=Line;
                                Line=ExtLog_GetString(Line,col,&HttpCodeEnd);
@@ -466,6 +591,15 @@ static enum ReadLogReturnCodeEnum ExtLog_ReadEntry(char *Line,struct ReadLogStru
        ExtLog_FixString(Entry->User,UserEnd);
        ExtLog_FixString(Entry->Url,UrlEnd);
        ExtLog_FixString(Entry->HttpCode,HttpCodeEnd);
+       if (!Entry->Url)
+       {
+               ExtLog_FixString(UrlScheme,UrlSchemeEnd);
+               ExtLog_FixString(UrlHost,UrlHostEnd);
+               ExtLog_FixString(UrlPort,UrlPortEnd);
+               ExtLog_FixString(UrlPath,UrlPathEnd);
+               ExtLog_FixString(UrlQuery,UrlQueryEnd);
+               Entry->Url=ExtLog_ConcatUrl(UrlScheme,UrlHost,UrlPort,UrlPath,UrlQuery);
+       }
 
        return(RLRC_NoError);
 }