]> git.ipfire.org Git - thirdparty/sarg.git/blobdiff - url.c
Rename configure.in as configure.ac
[thirdparty/sarg.git] / url.c
diff --git a/url.c b/url.c
index 668e0635a77c4511249fe39296a05b8ec6cf59d2..8c55610c57fb574762b15007b5ae5bd80f42ca2b 100644 (file)
--- a/url.c
+++ b/url.c
@@ -1,6 +1,6 @@
 /*
  * SARG Squid Analysis Report Generator      http://sarg.sourceforge.net
- *                                                            1998, 2012
+ *                                                            1998, 2015
  *
  * SARG donations:
  *      please look at http://sarg.sourceforge.net/donations.php
@@ -29,8 +29,6 @@
 #ifdef HAVE_PCRE_H
 #include <pcre.h>
 #define USE_PCRE
-#else
-#error "PCRE not compiled in"
 #endif
 
 /*!
@@ -96,6 +94,8 @@ struct hostalias_regex
        pcre *Re;
        //! The replacement name.
        const char *Alias;
+       //! \c True if this regular expression contains at least one subpattern
+       bool SubPartern;
 };
 #endif
 
@@ -147,14 +147,14 @@ static int Alias_StoreName(const char *name,const char *next)
                NameEnd=++str;
                while (str<next && (unsigned char)*str>' ') {
                        if (*str=='*') {
-                               debuga(_("Host name alias \"%s*%s\" contains too many wildcards (*)\n"),NameBegin,NameEnd);
+                               debuga(__FILE__,__LINE__,_("Host name alias \"%s*%s\" contains too many wildcards (*)\n"),NameBegin,NameEnd);
                                return(-1);
                        }
                        str++;
                }
                NameEndE=str;
                if (NameEnd==NameEndE) {
-                       debuga(_("Host name alias \"%*s\" must not end with a wildcard\n"),(int)(next-name),name);
+                       debuga(__FILE__,__LINE__,_("Host name alias \"%*s\" must not end with a wildcard\n"),(int)(next-name),name);
                        return(-1);
                }
        } else {
@@ -181,7 +181,7 @@ static int Alias_StoreName(const char *name,const char *next)
                len=(int)(NameBeginE-NameBegin);
                tmp=malloc(len+1);
                if (!tmp) {
-                       debuga(_("Not enough memory to store the host name aliasing directives\n"));
+                       debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
                        return(-1);
                }
                memcpy(tmp,NameBegin,len);
@@ -193,14 +193,14 @@ static int Alias_StoreName(const char *name,const char *next)
                tmp=malloc(len+1);
                if (!tmp) {
                        if (NameBegin) free((void*)NameBegin);
-                       debuga(_("Not enough memory to store the host name aliasing directives\n"));
+                       debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
                        return(-1);
                }
                memcpy(tmp,NameEnd,len);
                tmp[len]='\0';
                NameEnd=tmp;
        }
-       
+
        // ignore duplicates
        prev_alias=NULL;
        for (alias=FirstAliasName ; alias ; alias=alias->Next) {
@@ -217,7 +217,7 @@ static int Alias_StoreName(const char *name,const char *next)
        if (!new_alias) {
                if (NameBegin) free((void*)NameBegin);
                if (NameEnd) free((void*)NameEnd);
-               debuga(_("Not enough memory to store the host name aliasing directives\n"));
+               debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
                return(-1);
        }
        new_alias->MinLen=0;
@@ -241,7 +241,7 @@ static int Alias_StoreName(const char *name,const char *next)
                len=(int)(ReplaceE-Replace);
                tmp=malloc(len+2);
                if (!tmp) {
-                       debuga(_("Not enough memory to store the host name aliasing directives\n"));
+                       debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
                        return(-1);
                }
                tmp[0]=ALIAS_PREFIX;
@@ -251,7 +251,7 @@ static int Alias_StoreName(const char *name,const char *next)
        } else {
                tmp=malloc(new_alias->MinLen+2);
                if (!tmp) {
-                       debuga(_("Not enough memory to store the host name aliasing directives\n"));
+                       debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
                        return(-1);
                }
                tmp[0]=ALIAS_PREFIX;
@@ -262,7 +262,7 @@ static int Alias_StoreName(const char *name,const char *next)
                }
                new_alias->Alias=tmp;
        }
-               
+
        new_alias->Next=NULL;
        if (prev_alias)
                prev_alias->Next=new_alias;
@@ -317,7 +317,7 @@ static int Alias_StoreIpv4(unsigned char *ipv4,int nbits,const char *next)
        // insert into the list
        new_alias=malloc(sizeof(*new_alias));
        if (!new_alias) {
-               debuga(_("Not enough memory to store the host name aliasing directives\n"));
+               debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
                return(-1);
        }
        memcpy(new_alias->Ip,ipv4,4);
@@ -326,7 +326,7 @@ static int Alias_StoreIpv4(unsigned char *ipv4,int nbits,const char *next)
                len=(int)(ReplaceE-Replace);
                tmp=malloc(len+2);
                if (!tmp) {
-                       debuga(_("Not enough memory to store the host name aliasing directives\n"));
+                       debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
                        return(-1);
                }
                tmp[0]=ALIAS_PREFIX;
@@ -336,13 +336,13 @@ static int Alias_StoreIpv4(unsigned char *ipv4,int nbits,const char *next)
        } else {
                tmp=malloc(5*4+1);
                if (!tmp) {
-                       debuga(_("Not enough memory to store the host name aliasing directives\n"));
+                       debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
                        return(-1);
                }
                sprintf(tmp,"%c%d.%d.%d.%d/%d",ALIAS_PREFIX,ipv4[0],ipv4[1],ipv4[2],ipv4[3],nbits);
                new_alias->Alias=tmp;
        }
-               
+
        if (prev_alias) {
                new_alias->Next=prev_alias->Next;
                prev_alias->Next=new_alias;
@@ -399,7 +399,7 @@ static int Alias_StoreIpv6(unsigned short *ipv6,int nbits,const char *next)
        // insert into the list
        new_alias=malloc(sizeof(*new_alias));
        if (!new_alias) {
-               debuga(_("Not enough memory to store the host name aliasing directives\n"));
+               debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
                return(-1);
        }
        memcpy(new_alias->Ip,ipv6,8*sizeof(unsigned short int));
@@ -408,7 +408,7 @@ static int Alias_StoreIpv6(unsigned short *ipv6,int nbits,const char *next)
                len=ReplaceE-Replace;
                tmp=malloc(len+2);
                if (!tmp) {
-                       debuga(_("Not enough memory to store the host name aliasing directives\n"));
+                       debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
                        return(-1);
                }
                tmp[0]=ALIAS_PREFIX;
@@ -418,13 +418,13 @@ static int Alias_StoreIpv6(unsigned short *ipv6,int nbits,const char *next)
        } else {
                tmp=malloc(5*8+5);
                if (!tmp) {
-                       debuga(_("Not enough memory to store the host name aliasing directives\n"));
+                       debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
                        return(-1);
                }
                sprintf(tmp,"%c%x:%x:%x:%x:%x:%x:%x:%x/%d",ALIAS_PREFIX,ipv6[0],ipv6[1],ipv6[2],ipv6[3],ipv6[4],ipv6[5],ipv6[6],ipv6[7],nbits);
                new_alias->Alias=tmp;
        }
-               
+
        if (prev_alias) {
                new_alias->Next=prev_alias->Next;
                prev_alias->Next=new_alias;
@@ -455,46 +455,47 @@ static int Alias_StoreRegexp(char *buf)
        char *Replace;
        int len;
        char *tmp;
-       
+       int i;
+
        // find the pattern
        Delimiter=*buf++;
        for (End=buf ; *End && *End!=Delimiter ; End++) {
                if (*End=='\\') {
                        if (End[1]=='\0') {
-                               debuga(_("Invalid NUL character found in regular expression\n"));
+                               debuga(__FILE__,__LINE__,_("Invalid NUL character found in regular expression\n"));
                                return(-1);
                        }
                        End++; //ignore the escaped character
                }
        }
        if (*End!=Delimiter) {
-               debuga(_("Unterminated regular expression\n"));
+               debuga(__FILE__,__LINE__,_("Unterminated regular expression\n"));
                return(-1);
        }
        *End++='\0';
-       
+
        // find the alias
        for (Replace=End ; *Replace==' ' || *Replace=='\t' ; Replace++);
        for (End=Replace ; *End && (unsigned char)*End>' ' ; End++);
        *End='\0';
-               
+
        // store it
        new_alias=malloc(sizeof(*new_alias));
        if (!new_alias) {
-               debuga(_("Not enough memory to store the host name aliasing directives\n"));
+               debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
                return(-1);
        }
        new_alias->Next=NULL;
        new_alias->Re=pcre_compile(buf,0,&PcreError,&ErrorOffset,NULL);
        if (new_alias->Re==NULL) {
-               debuga(_("Failed to compile the regular expression: %s\n"),PcreError);
+               debuga(__FILE__,__LINE__,_("Failed to compile the regular expression \"%s\": %s\n"),buf,PcreError);
                free(new_alias);
                return(-1);
        }
        len=strlen(Replace);
        tmp=malloc(len+2);
        if (!tmp) {
-               debuga(_("Not enough memory to store the host name aliasing directives\n"));
+               debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
                pcre_free(new_alias->Re);
                return(-1);
        }
@@ -502,13 +503,21 @@ static int Alias_StoreRegexp(char *buf)
        memcpy(tmp+1,Replace,len);
        tmp[len+1]='\0';
        new_alias->Alias=tmp;
-       
+
+       new_alias->SubPartern=false;
+       for (i=1 ; tmp[i] ; i++)
+               // both the sed \1 and the perl $1 replacement operators are accepted
+               if ((tmp[i]=='\\' || tmp[i]=='$') && isdigit(tmp[i+1])) {
+                       new_alias->SubPartern=true;
+                       break;
+               }
+
        // chain it
        prev_alias=&FirstAliasRe;
        for (alias=FirstAliasRe ; alias ; alias=alias->Next)
                prev_alias=&alias->Next;
        *prev_alias=new_alias;
-               
+
        return(1);
 }
 #endif
@@ -520,6 +529,7 @@ Store an alias in the corresponding list.
 
 \retval 0 No error.
 \retval -1 Error in file.
+\retval -2 Unknown string type to store.
 */
 static int Alias_Store(char *buf)
 {
@@ -529,15 +539,15 @@ static int Alias_Store(char *buf)
        unsigned short int ipv6[8];
        int nbits;
        const char *next;
-       int Error;
-       
+       int Error=-2;
+
        if (strncasecmp(buf,"re:",3)==0) {
 #ifdef USE_PCRE
                if (Alias_StoreRegexp(buf+3)<0)
                        return(-1);
                return(0);
 #else
-               debuga(_("PCRE not compiled in therefore the regular expressions are not available in the host alias file\n"));
+               debuga(__FILE__,__LINE__,_("PCRE not compiled in therefore the regular expressions are not available in the host alias file\n"));
                return(-1);
 #endif
        }
@@ -568,47 +578,50 @@ void read_hostalias(const char *Filename)
        longline line;
        char *buf;
 
-       if (debug) debuga(_("Reading host alias file \"%s\"\n"),Filename);
+       if (debug) debuga(__FILE__,__LINE__,_("Reading host alias file \"%s\"\n"),Filename);
        fi=fopen(Filename,"rt");
        if (!fi) {
-               debuga(_("Cannot read host name alias file \"%s\" - %s\n"),Filename,strerror(errno));
+               debuga(__FILE__,__LINE__,_("Cannot read host name alias file \"%s\": %s\n"),Filename,strerror(errno));
                exit(EXIT_FAILURE);
        }
-       
+
        if ((line=longline_create())==NULL) {
-               debuga(_("Not enough memory to read the host name aliases\n"));
+               debuga(__FILE__,__LINE__,_("Not enough memory to read file \"%s\"\n"),Filename);
                exit(EXIT_FAILURE);
        }
 
        while ((buf=longline_read(fi,line)) != NULL) {
                if (Alias_Store(buf)<0) {
-                       debuga(_("While reading \"%s\"\n"),Filename);
+                       debuga(__FILE__,__LINE__,_("While reading \"%s\"\n"),Filename);
                        exit(EXIT_FAILURE);
                }
        }
-       
+
        longline_destroy(&line);
-       fclose(fi);
-       
+       if (fclose(fi)==EOF) {
+               debuga(__FILE__,__LINE__,_("Read error in \"%s\": %s\n"),Filename,strerror(errno));
+               exit(EXIT_FAILURE);
+       }
+
        if (debug) {
                struct hostalias_name *alias1;
                struct hostalias_ipv4 *alias4;
                struct hostalias_ipv6 *alias6;
 
-               debuga(_("List of host names to alias:\n"));
+               debuga(__FILE__,__LINE__,_("List of host names to alias:\n"));
                for (alias1=FirstAliasName ; alias1 ; alias1=alias1->Next) {
                        if (alias1->HostName_Prefix && alias1->HostName_Suffix)
-                               debuga(_("  %s*%s => %s\n"),alias1->HostName_Prefix,alias1->HostName_Suffix,alias1->Alias);
+                               debuga(__FILE__,__LINE__,_("  %s*%s => %s\n"),alias1->HostName_Prefix,alias1->HostName_Suffix,alias1->Alias);
                        else if (alias1->HostName_Prefix)
-                               debuga(_("  %s => %s\n"),alias1->HostName_Prefix,alias1->Alias);
+                               debuga(__FILE__,__LINE__,_("  %s => %s\n"),alias1->HostName_Prefix,alias1->Alias);
                        else
-                               debuga(_("  *%s => %s\n"),alias1->HostName_Suffix,alias1->Alias);
+                               debuga(__FILE__,__LINE__,_("  *%s => %s\n"),alias1->HostName_Suffix,alias1->Alias);
                }
                for (alias4=FirstAliasIpv4 ; alias4 ; alias4=alias4->Next) {
-                       debuga(_("  %d.%d.%d.%d/%d => %s\n"),alias4->Ip[0],alias4->Ip[1],alias4->Ip[2],alias4->Ip[3],alias4->NBits,alias4->Alias);
+                       debuga(__FILE__,__LINE__,_("  %d.%d.%d.%d/%d => %s\n"),alias4->Ip[0],alias4->Ip[1],alias4->Ip[2],alias4->Ip[3],alias4->NBits,alias4->Alias);
                }
                for (alias6=FirstAliasIpv6 ; alias6 ; alias6=alias6->Next) {
-                       debuga(_("  %x:%x:%x:%x:%x:%x:%x:%x/%d => %s\n"),alias6->Ip[0],alias6->Ip[1],alias6->Ip[2],alias6->Ip[3],
+                       debuga(__FILE__,__LINE__,_("  %x:%x:%x:%x:%x:%x:%x:%x/%d => %s\n"),alias6->Ip[0],alias6->Ip[1],alias6->Ip[2],alias6->Ip[3],
                                alias6->Ip[4],alias6->Ip[5],alias6->Ip[6],alias6->Ip[7],alias6->NBits,alias6->Alias);
                }
        }
@@ -622,7 +635,7 @@ void free_hostalias(void)
        {
                struct hostalias_name *alias1;
                struct hostalias_name *next1;
-               
+
                for (alias1=FirstAliasName ; alias1 ; alias1=next1) {
                        next1=alias1->Next;
                        if (alias1->HostName_Prefix) free((void *)alias1->HostName_Prefix);
@@ -635,7 +648,7 @@ void free_hostalias(void)
        {
                struct hostalias_ipv4 *alias4;
                struct hostalias_ipv4 *next4;
-               
+
                for (alias4=FirstAliasIpv4 ; alias4 ; alias4=next4) {
                        next4=alias4->Next;
                        free((void *)alias4->Alias);
@@ -646,7 +659,7 @@ void free_hostalias(void)
        {
                struct hostalias_ipv6 *alias6;
                struct hostalias_ipv6 *next6;
-               
+
                for (alias6=FirstAliasIpv6 ; alias6 ; alias6=next6) {
                        next6=alias6->Next;
                        free((void *)alias6->Alias);
@@ -658,7 +671,7 @@ void free_hostalias(void)
        {
                struct hostalias_regex *alias;
                struct hostalias_regex *next;
-               
+
                for (alias=FirstAliasRe ; alias ; alias=next) {
                        next=alias->Next;
                        pcre_free(alias->Re);
@@ -717,7 +730,7 @@ static const char *alias_url_ipv4(const char *url,unsigned char *ipv4)
        int len;
 
        for (alias=FirstAliasIpv4 ; alias ; alias=alias->Next) {
-               len=alias->NBits;       
+               len=alias->NBits;
                if ((len<8 || memcmp(ipv4,alias->Ip,len/8)==0) && ((len%8)==0 || (ipv4[len/8] ^ alias->Ip[len/8]) & (0xFFU<<(8-len%8)))==0) {
                        return(alias->Alias);
                }
@@ -755,20 +768,64 @@ static const char *alias_url_ipv6(const char *url,unsigned short int *ipv6)
 /*!
 Replace the host name by its alias if it is in our list.
 
-\param url The host name.
+\param url_ptr A pointer to the host name to match. It is replaced
+by a pointer to the alias if a match is found.
 
-\return The pointer to the host name or its alias.
+\return \c True if a match is found or \c false if it failed.
+
+\warning The function is not thread safe as it may return a static
+internal buffer.
 */
-static const char *alias_url_regex(const char *url)
+static bool alias_url_regex(const char **url_ptr)
 {
        struct hostalias_regex *alias;
+       int nmatches;
+       const char *url;
+       int url_len;
+       int ovector[30];//size must be a multiple of 3
+       static char Replacement[1024];
+       const char *str;
+       int i;
+       int sub;
+       int repl_idx;
 
+       url=*url_ptr;
+       url_len=strlen(url);
        for (alias=FirstAliasRe ; alias ; alias=alias->Next) {
-               if (pcre_exec(alias->Re,NULL,url,strlen(url),0,0,NULL,0)==0) {
-                       return(alias->Alias);
+               nmatches=pcre_exec(alias->Re,NULL,url,url_len,0,0,ovector,sizeof(ovector)/sizeof(ovector[0]));
+               if (nmatches>=0) {
+                       if (nmatches==0) nmatches=(int)(sizeof(ovector)/sizeof(ovector[0]))/3*2; //only 2/3 of the vector is used by pcre_exec
+                       if (nmatches==1 || !alias->SubPartern) { //no subpattern to replace
+                               *url_ptr=alias->Alias;
+                       } else {
+                               repl_idx=0;
+                               str=alias->Alias;
+                               for (i=0 ; str[i] ; i++) {
+                                       // both the sed \1 and the perl $1 replacement operators are accepted
+                                       if ((str[i]=='\\' || str[i]=='$') && isdigit(str[i+1])) {
+                                               sub=str[++i]-'0';
+                                               if (sub>=1 && sub<=nmatches) {
+                                                       /*
+                                                        * ovector[sub] is the start position of the match.
+                                                        * ovector[sub+1] is the end position of the match.
+                                                        */
+                                                       sub<<=1;
+                                                       if (repl_idx+ovector[sub+1]-ovector[sub]>=sizeof(Replacement)-1) break;
+                                                       memcpy(Replacement+repl_idx,url+ovector[sub],ovector[sub+1]-ovector[sub]);
+                                                       repl_idx+=ovector[sub+1]-ovector[sub];
+                                                       continue;
+                                               }
+                                       }
+                                       if (repl_idx>=sizeof(Replacement)-1) break;
+                                       Replacement[repl_idx++]=str[i];
+                               }
+                               Replacement[repl_idx]='\0';
+                               *url_ptr=Replacement;
+                       }
+                       return(true);
                }
        }
-       return(url);
+       return(false);
 }
 #endif
 
@@ -782,7 +839,7 @@ Find the beginning of the URL beyond the scheme://
 const char *skip_scheme(const char *url)
 {
        const char *str;
-       
+
        /*
        Skip any scheme:// at the beginning of the URL (see rfc2396 section 3.1).
        The underscore is not part of the standard but is found in the squid logs as cache_object://.
@@ -802,27 +859,28 @@ Get the part of the URL necessary to generate the report.
 \param full_url \c True to keep the whole URL. If \c false,
 the URL is truncated to only keep the host name and port number.
 */
-const char *process_url(char *url,bool full_url)
+const char *process_url(const char *url,bool full_url)
 {
-       char *str;
+       static char short_url[1024];
+       int i;
        const char *start;
        int type;
-       const char *address;
        unsigned char ipv4[4];
        unsigned short int ipv6[8];
        const char *next;
 
        start=skip_scheme(url);
        if (!full_url) {
-               for (str=(char *)start ; *str && *str!='/' && *str!='?' ; str++);
-               *str='\0';
+               for (i=0 ; i<sizeof(short_url)-1 && start[i] && start[i]!='/' && start[i]!='?' ; i++)
+                       short_url[i]=start[i];
+               short_url[i]='\0';
+               start=short_url;
 #ifdef USE_PCRE
                if (FirstAliasRe) {
-                       address=alias_url_regex(start);
-                       if (address!=start) return(address);
+                       if (alias_url_regex(&start)) return(start);
                }
 #endif
-               type=extract_address_mask(start,&address,ipv4,ipv6,NULL,&next);
+               type=extract_address_mask(start,NULL,ipv4,ipv6,NULL,&next);
                if (type==1) {
                        if (FirstAliasName)
                                start=alias_url_name(start,next);