From: Frederic Marchal Date: Thu, 20 Aug 2015 18:16:14 +0000 (+0200) Subject: Ignore empty URL in extended log format X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7985798a972f0777641f374afdffeceaaa34a763;p=thirdparty%2Fsarg.git Ignore empty URL in extended log format The sample log provided here: https://sourceforge.net/p/sarg/discussion/234317/thread/224c3daa/#a5ca shows that some lines can have an empty URL. It looks like it denotes failed connection attempts to unavailable resources. Instead of aborting the processing, we ignore such lines. The summary presented when debug output is enabled shows how many lines have been ignored. --- diff --git a/readlog_extlog.c b/readlog_extlog.c index 4498497..33491d5 100644 --- a/readlog_extlog.c +++ b/readlog_extlog.c @@ -408,9 +408,22 @@ static char *ExtLog_ConcatUrl(const char *Scheme,const char *Host,const char *Po int tlen=0; int len; - Host=ExtLog_FixEmptyString(Host); - if (!Host) return(NULL); Scheme=ExtLog_FixEmptyString(Scheme); + Host=ExtLog_FixEmptyString(Host); + if (!Scheme && !Host) + { + /* + * Example of such an entry: + * + * #Fields: + * date time time-taken c-ip sc-status s-action sc-bytes cs-bytes cs-method cs-uri-scheme cs-host cs-uri-port cs-uri-path cs-uri-query cs-username cs-auth-group s-hierarchy s-supplier-name rs(Content-Type) cs(Referer) cs(User-Agent) sc-filter-result cs-categories x-virus-id s-ip + * 2015-07-29 06:05:50 30 192.168.1.21 400 TCP_NC_MISS 903 8163 unknown - - 0 / - userid - - 10.81.0.26 - - - DENIED "unavailable" - 10.81.0.26 - - ICAP_NOT_SCANNED - - - + * + * It looks like a failed connection attempt to an unavailable resource. Let's assume it is safe to ignore it. + */ + ExtTempUrl[0]='\0'; + return(ExtTempUrl); + } Port=ExtLog_FixEmptyString(Port); Path=ExtLog_FixEmptyString(Path); Query=ExtLog_FixEmptyString(Query); @@ -428,11 +441,13 @@ static char *ExtLog_ConcatUrl(const char *Scheme,const char *Host,const char *Po tlen+=len+3; } - len=strlen(Host); - if (tlen+len>=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-1; - strncpy(ExtTempUrl+tlen,Host,len); - tlen+=len; - ExtTempUrl[tlen]='\0'; + if (Host) + { + len=strlen(Host); + if (tlen+len>=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-1; + strncpy(ExtTempUrl+tlen,Host,len); + tlen+=len; + } if (tlen+2=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-1; strncpy(ExtTempUrl+tlen,Path,len); tlen+=len; - ExtTempUrl[tlen]='\0'; } if (tlen=sizeof(ExtTempUrl)) len=sizeof(ExtTempUrl)-tlen-1; strncpy(ExtTempUrl+tlen,Query,len); tlen+=len; - ExtTempUrl[tlen]='\0'; } + ExtTempUrl[tlen]='\0'; return(ExtTempUrl); }