]> git.ipfire.org Git - thirdparty/sarg.git/commitdiff
Accept subpatterns in the host alias
authorFrédéric Marchal <fmarchal@users.sourceforge.net>
Sat, 10 Mar 2012 11:01:25 +0000 (12:01 +0100)
committerFrédéric Marchal <fmarchal@users.sourceforge.net>
Sat, 10 Mar 2012 11:01:25 +0000 (12:01 +0100)
Both the sed (\1) and perl ($1) subpatterns are accepted. Only 9
subpatterns are taken into account.

sarg.conf
url.c
util.c

index c3b44a1b50bfea985cdf0e98d945c24cdd945b78..30a0e457d7a13c89a42c214e237b6682ea39aef1 100644 (file)
--- a/sarg.conf
+++ b/sarg.conf
 #      IPv6 addresses.
 #      Regular expressions can also be used if sarg was compiled with libpcre.
 #      A regular expression is formated as re:/regexp/ alias
+#      The regexp is a perl regular expression (see man perlre).
+#      Subpatterns are allowed in the alias. Sarg recognizes sed (\1) or perl ($1)
+#      subpatterns. Only 9 subpatterns are allowed in the replacement string.
 #
 #      Example:
 #      *.gstatic.com
 #      *.mail.live.com
 #      65.52.00.00/14 *.mail.live.com
 #      re:/\.dropbox\.com(:443)?/ dropbox
+#      re:/([\w-]+)\.(\w*[a-zA-Z]\w*)(?::\d+)?$/\1.\2
 #hostalias /usr/local/sarg/hostalias
diff --git a/url.c b/url.c
index 668e0635a77c4511249fe39296a05b8ec6cf59d2..ec5b436306c3a1c07d78a280647524168ff611ff 100644 (file)
--- a/url.c
+++ b/url.c
@@ -96,6 +96,8 @@ struct hostalias_regex
        pcre *Re;
        //! The replacement name.
        const char *Alias;
+       //! \c True if this regular expression contains at least one subpattern
+       bool SubPartern;
 };
 #endif
 
@@ -455,6 +457,7 @@ static int Alias_StoreRegexp(char *buf)
        char *Replace;
        int len;
        char *tmp;
+       int i;
        
        // find the pattern
        Delimiter=*buf++;
@@ -503,6 +506,14 @@ static int Alias_StoreRegexp(char *buf)
        tmp[len+1]='\0';
        new_alias->Alias=tmp;
        
+       new_alias->SubPartern=false;
+       for (i=1 ; tmp[i] ; i++)
+               // both the sed \1 and the perl $1 replacement operators are accepted
+               if ((tmp[i]=='\\' || tmp[i]=='$') && isdigit(tmp[i+1])) {
+                       new_alias->SubPartern=true;
+                       break;
+               }
+       
        // chain it
        prev_alias=&FirstAliasRe;
        for (alias=FirstAliasRe ; alias ; alias=alias->Next)
@@ -755,20 +766,64 @@ static const char *alias_url_ipv6(const char *url,unsigned short int *ipv6)
 /*!
 Replace the host name by its alias if it is in our list.
 
-\param url The host name.
+\param url_ptr A pointer to the host name to match. It is replaced
+by a pointer to the alias if a match is found.
 
-\return The pointer to the host name or its alias.
+\return \c True if a match is found or \c false if it failed.
+
+\warning The function is not thread safe as it may return a static
+internal buffer.
 */
-static const char *alias_url_regex(const char *url)
+static bool alias_url_regex(const char **url_ptr)
 {
        struct hostalias_regex *alias;
+       int nmatches;
+       const char *url;
+       int url_len;
+       int ovector[30];//size must be a multiple of 3
+       static char Replacement[1024];
+       const char *str;
+       int i;
+       int sub;
+       int repl_idx;
 
+       url=*url_ptr;
+       url_len=strlen(url);
        for (alias=FirstAliasRe ; alias ; alias=alias->Next) {
-               if (pcre_exec(alias->Re,NULL,url,strlen(url),0,0,NULL,0)==0) {
-                       return(alias->Alias);
+               nmatches=pcre_exec(alias->Re,NULL,url,url_len,0,0,ovector,sizeof(ovector)/sizeof(ovector[0]));
+               if (nmatches>=0) {
+                       if (nmatches==0) nmatches=(int)(sizeof(ovector)/sizeof(ovector[0]))/3*2; //only 2/3 of the vector is used by pcre_exec
+                       if (nmatches==1 || !alias->SubPartern) { //no subpattern to replace
+                               *url_ptr=alias->Alias;
+                       } else {
+                               repl_idx=0;
+                               str=alias->Alias;
+                               for (i=0 ; str[i] ; i++) {
+                                       // both the sed \1 and the perl $1 replacement operators are accepted
+                                       if ((str[i]=='\\' || str[i]=='$') && isdigit(str[i+1])) {
+                                               sub=str[++i]-'0';
+                                               if (sub>=1 && sub<=nmatches) {
+                                                       /*
+                                                        * ovector[sub] is the start position of the match.
+                                                        * ovector[sub+1] is the end position of the match.
+                                                        */
+                                                       sub<<=1;
+                                                       if (repl_idx+ovector[sub+1]-ovector[sub]>=sizeof(Replacement)-1) break;
+                                                       memcpy(Replacement+repl_idx,url+ovector[sub],ovector[sub+1]-ovector[sub]);
+                                                       repl_idx+=ovector[sub+1]-ovector[sub];
+                                                       continue;
+                                               }
+                                       }
+                                       if (repl_idx>=sizeof(Replacement)-1) break;
+                                       Replacement[repl_idx++]=str[i];
+                               }
+                               Replacement[repl_idx]='\0';
+                               *url_ptr=Replacement;
+                       }
+                       return(true);
                }
        }
-       return(url);
+       return(false);
 }
 #endif
 
@@ -807,7 +862,6 @@ const char *process_url(char *url,bool full_url)
        char *str;
        const char *start;
        int type;
-       const char *address;
        unsigned char ipv4[4];
        unsigned short int ipv6[8];
        const char *next;
@@ -818,11 +872,10 @@ const char *process_url(char *url,bool full_url)
                *str='\0';
 #ifdef USE_PCRE
                if (FirstAliasRe) {
-                       address=alias_url_regex(start);
-                       if (address!=start) return(address);
+                       if (alias_url_regex(&start)) return(start);
                }
 #endif
-               type=extract_address_mask(start,&address,ipv4,ipv6,NULL,&next);
+               type=extract_address_mask(start,NULL,ipv4,ipv6,NULL,&next);
                if (type==1) {
                        if (FirstAliasName)
                                start=alias_url_name(start,next);
diff --git a/util.c b/util.c
index 32f076c49b12f02e9f232631e7630d8eb2137b8f..1891b755b75c17ca8bc6996716403516443d05a4 100644 (file)
--- a/util.c
+++ b/util.c
@@ -1857,6 +1857,7 @@ void unlinkdir(const char *dir,int contentonly)
 
   \param buf The buffer to parse.
   \param text A pointer to set to the beginning of the string pattern. No terminating zero is inserted.
+              The pointer may be NULL.
   \param ipv4 A 4 bytes buffer to store the bytes of the IPv4 address.
   \param ipv6 A 8 short integers buffer to store the values of the IPv6 address.
   \param nbits The number of prefix bits for an IP address.
@@ -1948,8 +1949,10 @@ int extract_address_mask(const char *buf,const char **text,unsigned char *ipv4,u
                        addr[addr_len++]=(unsigned short)(value6 & 0xFFFFU);
        }
        if (!ip_size) {
-               *text=buf;
-               if (bracket) (*text)--;
+               if (text) {
+                       *text=buf;
+                       if (bracket) (*text)--;
+               }
                while ((unsigned char)buf[i]>' ') i++;
                if (next) *next=buf+i;
                return(1);