]> git.ipfire.org Git - thirdparty/sarg.git/commitdiff
Ignore empty URL in extended log format
authorFrederic Marchal <fmarchal@users.sourceforge.net>
Thu, 20 Aug 2015 18:16:14 +0000 (20:16 +0200)
committerFrederic Marchal <fmarchal@users.sourceforge.net>
Thu, 20 Aug 2015 18:16:14 +0000 (20:16 +0200)
The sample log provided here:
https://sourceforge.net/p/sarg/discussion/234317/thread/224c3daa/#a5ca
shows that some lines can have an empty URL.

It looks like it denotes failed connection attempts to unavailable
resources. Instead of aborting the processing, we ignore such lines.

The summary presented when debug output is enabled shows how many lines
have been ignored.

readlog_extlog.c

index 4498497a3cbb3aaa01ac150f3f1537d6dc2cac43..33491d5b774631263824e1fe5f06f30aea051593 100644 (file)
@@ -408,9 +408,22 @@ static char *ExtLog_ConcatUrl(const char *Scheme,const char *Host,const char *Po
        int tlen=0;
        int len;
 
-       Host=ExtLog_FixEmptyString(Host);
-       if (!Host) return(NULL);
        Scheme=ExtLog_FixEmptyString(Scheme);
+       Host=ExtLog_FixEmptyString(Host);
+       if (!Scheme && !Host)
+       {
+               /*
+                * Example of such an entry:
+                *
+                * #Fields:
+                * date time time-taken c-ip sc-status s-action sc-bytes cs-bytes cs-method cs-uri-scheme cs-host cs-uri-port cs-uri-path cs-uri-query cs-username cs-auth-group s-hierarchy s-supplier-name rs(Content-Type) cs(Referer) cs(User-Agent) sc-filter-result cs-categories x-virus-id s-ip
+                * 2015-07-29 06:05:50 30 192.168.1.21 400 TCP_NC_MISS 903 8163 unknown - - 0 / - userid - - 10.81.0.26 - - - DENIED "unavailable" - 10.81.0.26 - - ICAP_NOT_SCANNED - - -
+                *
+                * It looks like a failed connection attempt to an unavailable resource. Let's assume it is safe to ignore it.
+                */
+               ExtTempUrl[0]='\0';
+               return(ExtTempUrl);
+       }
        Port=ExtLog_FixEmptyString(Port);
        Path=ExtLog_FixEmptyString(Path);
        Query=ExtLog_FixEmptyString(Query);
@@ -428,11 +441,13 @@ static char *ExtLog_ConcatUrl(const char *Scheme,const char *Host,const char *Po
                tlen+=len+3;
        }
 
-       len=strlen(Host);
-       if (tlen+len>=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-1;
-       strncpy(ExtTempUrl+tlen,Host,len);
-       tlen+=len;
-       ExtTempUrl[tlen]='\0';
+       if (Host)
+       {
+               len=strlen(Host);
+               if (tlen+len>=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-1;
+               strncpy(ExtTempUrl+tlen,Host,len);
+               tlen+=len;
+       }
 
        if (tlen+2<sizeof(ExtTempUrl) && Port)
        {
@@ -441,7 +456,6 @@ static char *ExtLog_ConcatUrl(const char *Scheme,const char *Host,const char *Po
                ExtTempUrl[tlen++]=':';
                strncpy(ExtTempUrl+tlen,Port,len);
                tlen+=len;
-               ExtTempUrl[tlen]='\0';
        }
 
        if (tlen<sizeof(ExtTempUrl) && Path)
@@ -450,7 +464,6 @@ static char *ExtLog_ConcatUrl(const char *Scheme,const char *Host,const char *Po
                if (tlen+len>=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-1;
                strncpy(ExtTempUrl+tlen,Path,len);
                tlen+=len;
-               ExtTempUrl[tlen]='\0';
        }
 
        if (tlen<sizeof(ExtTempUrl) && Query)
@@ -459,8 +472,8 @@ static char *ExtLog_ConcatUrl(const char *Scheme,const char *Host,const char *Po
                if (tlen+len>=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-1;
                strncpy(ExtTempUrl+tlen,Query,len);
                tlen+=len;
-               ExtTempUrl[tlen]='\0';
        }
+       ExtTempUrl[tlen]='\0';
        return(ExtTempUrl);
 }