From: Frédéric Marchal Date: Sat, 10 Mar 2012 11:01:25 +0000 (+0100) Subject: Accept subpatterns in the host alias X-Git-Tag: v2.3.3-pre1~18 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a16cb22a92d21fa6a7008f83453e404d20caf608;p=thirdparty%2Fsarg.git Accept subpatterns in the host alias Both the sed (\1) and perl ($1) subpatterns are accepted. Only 9 subpatterns are taken into account. --- diff --git a/sarg.conf b/sarg.conf index c3b44a1..30a0e45 100644 --- a/sarg.conf +++ b/sarg.conf @@ -753,6 +753,9 @@ # IPv6 addresses. # Regular expressions can also be used if sarg was compiled with libpcre. # A regular expression is formated as re:/regexp/ alias +# The regexp is a perl regular expression (see man perlre). +# Subpatterns are allowed in the alias. Sarg recognizes sed (\1) or perl ($1) +# subpatterns. Only 9 subpatterns are allowed in the replacement string. # # Example: # *.gstatic.com @@ -763,4 +766,5 @@ # *.mail.live.com # 65.52.00.00/14 *.mail.live.com # re:/\.dropbox\.com(:443)?/ dropbox +# re:/([\w-]+)\.(\w*[a-zA-Z]\w*)(?::\d+)?$/\1.\2 #hostalias /usr/local/sarg/hostalias diff --git a/url.c b/url.c index 668e063..ec5b436 100644 --- a/url.c +++ b/url.c @@ -96,6 +96,8 @@ struct hostalias_regex pcre *Re; //! The replacement name. const char *Alias; + //! \c True if this regular expression contains at least one subpattern + bool SubPartern; }; #endif @@ -455,6 +457,7 @@ static int Alias_StoreRegexp(char *buf) char *Replace; int len; char *tmp; + int i; // find the pattern Delimiter=*buf++; @@ -503,6 +506,14 @@ static int Alias_StoreRegexp(char *buf) tmp[len+1]='\0'; new_alias->Alias=tmp; + new_alias->SubPartern=false; + for (i=1 ; tmp[i] ; i++) + // both the sed \1 and the perl $1 replacement operators are accepted + if ((tmp[i]=='\\' || tmp[i]=='$') && isdigit(tmp[i+1])) { + new_alias->SubPartern=true; + break; + } + // chain it prev_alias=&FirstAliasRe; for (alias=FirstAliasRe ; alias ; alias=alias->Next) @@ -755,20 +766,64 @@ static const char *alias_url_ipv6(const char *url,unsigned short int *ipv6) /*! Replace the host name by its alias if it is in our list. -\param url The host name. +\param url_ptr A pointer to the host name to match. It is replaced +by a pointer to the alias if a match is found. -\return The pointer to the host name or its alias. +\return \c True if a match is found or \c false if it failed. + +\warning The function is not thread safe as it may return a static +internal buffer. */ -static const char *alias_url_regex(const char *url) +static bool alias_url_regex(const char **url_ptr) { struct hostalias_regex *alias; + int nmatches; + const char *url; + int url_len; + int ovector[30];//size must be a multiple of 3 + static char Replacement[1024]; + const char *str; + int i; + int sub; + int repl_idx; + url=*url_ptr; + url_len=strlen(url); for (alias=FirstAliasRe ; alias ; alias=alias->Next) { - if (pcre_exec(alias->Re,NULL,url,strlen(url),0,0,NULL,0)==0) { - return(alias->Alias); + nmatches=pcre_exec(alias->Re,NULL,url,url_len,0,0,ovector,sizeof(ovector)/sizeof(ovector[0])); + if (nmatches>=0) { + if (nmatches==0) nmatches=(int)(sizeof(ovector)/sizeof(ovector[0]))/3*2; //only 2/3 of the vector is used by pcre_exec + if (nmatches==1 || !alias->SubPartern) { //no subpattern to replace + *url_ptr=alias->Alias; + } else { + repl_idx=0; + str=alias->Alias; + for (i=0 ; str[i] ; i++) { + // both the sed \1 and the perl $1 replacement operators are accepted + if ((str[i]=='\\' || str[i]=='$') && isdigit(str[i+1])) { + sub=str[++i]-'0'; + if (sub>=1 && sub<=nmatches) { + /* + * ovector[sub] is the start position of the match. + * ovector[sub+1] is the end position of the match. + */ + sub<<=1; + if (repl_idx+ovector[sub+1]-ovector[sub]>=sizeof(Replacement)-1) break; + memcpy(Replacement+repl_idx,url+ovector[sub],ovector[sub+1]-ovector[sub]); + repl_idx+=ovector[sub+1]-ovector[sub]; + continue; + } + } + if (repl_idx>=sizeof(Replacement)-1) break; + Replacement[repl_idx++]=str[i]; + } + Replacement[repl_idx]='\0'; + *url_ptr=Replacement; + } + return(true); } } - return(url); + return(false); } #endif @@ -807,7 +862,6 @@ const char *process_url(char *url,bool full_url) char *str; const char *start; int type; - const char *address; unsigned char ipv4[4]; unsigned short int ipv6[8]; const char *next; @@ -818,11 +872,10 @@ const char *process_url(char *url,bool full_url) *str='\0'; #ifdef USE_PCRE if (FirstAliasRe) { - address=alias_url_regex(start); - if (address!=start) return(address); + if (alias_url_regex(&start)) return(start); } #endif - type=extract_address_mask(start,&address,ipv4,ipv6,NULL,&next); + type=extract_address_mask(start,NULL,ipv4,ipv6,NULL,&next); if (type==1) { if (FirstAliasName) start=alias_url_name(start,next); diff --git a/util.c b/util.c index 32f076c..1891b75 100644 --- a/util.c +++ b/util.c @@ -1857,6 +1857,7 @@ void unlinkdir(const char *dir,int contentonly) \param buf The buffer to parse. \param text A pointer to set to the beginning of the string pattern. No terminating zero is inserted. + The pointer may be NULL. \param ipv4 A 4 bytes buffer to store the bytes of the IPv4 address. \param ipv6 A 8 short integers buffer to store the values of the IPv6 address. \param nbits The number of prefix bits for an IP address. @@ -1948,8 +1949,10 @@ int extract_address_mask(const char *buf,const char **text,unsigned char *ipv4,u addr[addr_len++]=(unsigned short)(value6 & 0xFFFFU); } if (!ip_size) { - *text=buf; - if (bracket) (*text)--; + if (text) { + *text=buf; + if (bracket) (*text)--; + } while ((unsigned char)buf[i]>' ') i++; if (next) *next=buf+i; return(1);