]> git.ipfire.org Git - thirdparty/sarg.git/commitdiff
Add support for regular expressions in aliasing the hosts
authorFrédéric Marchal <fmarchal@users.sourceforge.net>
Sun, 12 Feb 2012 18:52:03 +0000 (19:52 +0100)
committerFrédéric Marchal <fmarchal@users.sourceforge.net>
Sun, 12 Feb 2012 18:52:03 +0000 (19:52 +0100)
If libpcre is found on the system, a regular expression can be defined
in the hostalias file to replace the matching host names in the report.

A regular expression is formated in the host alias file as
re:/regexp/ alias

The alias is mandatory. The regexp may be delimited by another character.

CMakeLists.txt
configure.in
include/config.h.in
sarg.conf
url.c

index 15a16571988870cbb07dcc319def4387c924e1b6..a541a3cfb78e65bf372bfa8cb8f05e97e6bf5093 100755 (executable)
@@ -231,6 +231,17 @@ IF(HAVE_LDAP_H)
    ENDIF(LDAP_LIBRARY)
 ENDIF(HAVE_LDAP_H)
 
+# Find pcre
+CHECK_INCLUDE_FILE(pcre.h HAVE_PCRE_H)
+IF(HAVE_PCRE_H)
+   FIND_LIBRARY(PCRE_LIBRARY NAMES pcre DOC "The pcre library")
+   IF(PCRE_LIBRARY)
+      TARGET_LINK_LIBRARIES(sarg ${PCRE_LIBRARY})
+      SET(HAVE_PCRE PCRE_LIBRARY CACHE PATH DOC "True if pcre was found")
+      SET(HAVE_PCRE CACHE BOOL DOC "True if must use pcre")
+   ENDIF(PCRE_LIBRARY)
+ENDIF(HAVE_PCRE_H)
+
 # Support for large files
 OPTION(ENABLE_LARGEFILE "Enable the usage of large files" ON)
 IF(ENABLE_LARGEFILE)
index 02b13ac113a47eff1aebd833b8fdd644651cfca2..5e938d456d5aea15852ed611439632e7f4e26150 100644 (file)
@@ -126,6 +126,22 @@ else
        AC_MSG_NOTICE([Not building with iconv as requested on the configuration command line])
 fi
 
+# Build with pcre
+AC_ARG_WITH([pcre],
+AS_HELP_STRING([--with-pcre],[Compile with support for the Perl Compatible Regular Expressions library]),
+[],[with_pcre=check])
+if ( test "x$with_pcre" != "xno" ) ; then
+       AC_CHECK_HEADERS(pcre.h)
+       if ( test "x$ac_cv_header_pcre_h" == "xyes" ) ; then
+               LDFLAGS="$LDFLAGS $(pcre-config --libs)"
+               CFLAGS="$CFLAGS $(pcre-config --cflags)"
+       else
+               pcre_status="not found"
+       fi
+else
+       pcre_status="disabled"
+fi
+
 dnl Checks for typedefs, structures, and compiler characteristics.
 AC_C_CONST
 AC_STRUCT_TM
@@ -300,3 +316,9 @@ if ( test "x$gd_status" == "xdisabled" ) ; then
 elif ( test "x$gd_status" == "xnot found" ) ; then
        AC_MSG_NOTICE([gd.h was not found so the graphs won't be available in the report])
 fi
+
+if ( test "x$pcre_status" == "xdisabled" ) ; then
+       AC_MSG_NOTICE([Not building with pcre as requested on the configuration command line])
+elif ( test "x$pcre_status" == "xnot found" ) ; then
+       AC_MSG_NOTICE([pcre.h was not found so the regexp won't be available in the hostalias])
+fi
index bac43704b58159e5ebff130e0c6eff988b0ba8b1..c042e9c2ba6fb091b1c878b488a4e4b8aa2fd080 100644 (file)
@@ -49,6 +49,7 @@
 #cmakedefine HAVE_GETOPT_H
 #cmakedefine HAVE_WS2TCPIP_H
 #cmakedefine HAVE_FCNTL_H
+#cmakedefine HAVE_PCRE_H
 
 #cmakedefine IBERTY_LIB
 
index 74caf33a7d579262a874f5b240c831a4ff1a25c6..e93441da70cef2a5e8d89d56ab60055867010fe9 100644 (file)
--- a/sarg.conf
+++ b/sarg.conf
 #      reports.
 #      IP addresses are supported and accept the CIDR notation both for IPv4 and
 #      IPv6 addresses.
+#      Regular expressions can also be used if sarg was compiled with libpcre.
+#      A regular expression is formated as re:/regexp/ alias
 #
 #      Example:
 #      *.gstatic.com
 #      *.freeav.net antivirus:freeav
 #      *.mail.live.com
 #      65.52.00.00/14 *.mail.live.com
+#      re:/\.dropbox\.com(:443)?/ dropbox
 #hostalias /usr/local/sarg/hostalias
diff --git a/url.c b/url.c
index 6e828f9773291c29698e31ad8d5bceee7aef345d..794dfed253736d143842c91398658e238ddcdc7c 100644 (file)
--- a/url.c
+++ b/url.c
 
 #include "include/conf.h"
 #include "include/defs.h"
+#ifdef HAVE_PCRE_H
+#include <pcre.h>
+#define USE_PCRE
+#else
+#error "PCRE not compiled in"
+#endif
 
 /*!
 A host name and the name to report.
@@ -78,6 +84,21 @@ struct hostalias_ipv6
        const char *Alias;
 };
 
+#ifdef USE_PCRE
+/*!
+A regular expression.
+*/
+struct hostalias_regex
+{
+       //! The next regular expression to test.
+       struct hostalias_regex *Next;
+       //! The regular expression to match against the host name.
+       pcre *Re;
+       //! The replacement name.
+       const char *Alias;
+};
+#endif
+
 //! The first host name.
 static struct hostalias_name *FirstAliasName=NULL;
 //! The first IPv4 address.
@@ -85,6 +106,10 @@ static struct hostalias_ipv4 *FirstAliasIpv4=NULL;
 //! The first IPv§ address.
 static struct hostalias_ipv6 *FirstAliasIpv6=NULL;
 
+#ifdef USE_PCRE
+static struct hostalias_regex *FirstAliasRe=NULL;
+#endif
+
 /*!
   Store a name to alias.
 
@@ -410,22 +435,135 @@ static int Alias_StoreIpv6(unsigned short *ipv6,int nbits,const char *next)
        return(1);
 }
 
+#ifdef USE_PCRE
 /*!
-Read the file containing the host names to alias in the report.
+Store a regular expression to match the alias.
 
-\param Filename The name of the file.
+\retval 1 Alias added.
+\retval 0 Ignore the line.
+\retval -1 Error.
 */
-void read_hostalias(const char *Filename)
+static int Alias_StoreRegexp(char *buf)
+{
+       char Delimiter;
+       char *End;
+       struct hostalias_regex *alias;
+       struct hostalias_regex *new_alias;
+       struct hostalias_regex **prev_alias;
+       const char *PcreError;
+       int ErrorOffset;
+       char *Replace;
+       int len;
+       char *tmp;
+       
+       // find the pattern
+       Delimiter=*buf++;
+       for (End=buf ; *End && *End!=Delimiter ; End++) {
+               if (*End=='\\') {
+                       if (End[1]=='\0') {
+                               debuga(_("Invalid NUL character found in regular expression\n"));
+                               return(-1);
+                       }
+                       End++; //ignore the escaped character
+               }
+       }
+       if (*End!=Delimiter) {
+               debuga(_("Unterminated regular expression\n"));
+               return(-1);
+       }
+       *End++='\0';
+       
+       // find the alias
+       for (Replace=End ; *Replace==' ' || *Replace=='\t' ; Replace++);
+       for (End=Replace ; *End && (unsigned char)*End>' ' ; End++);
+       *End='\0';
+               
+       // store it
+       new_alias=malloc(sizeof(*new_alias));
+       if (!new_alias) {
+               debuga(_("Not enough memory to store the host name aliasing directives\n"));
+               return(-1);
+       }
+       new_alias->Next=NULL;
+       new_alias->Re=pcre_compile(buf,0,&PcreError,&ErrorOffset,NULL);
+       if (new_alias->Re==NULL) {
+               debuga(_("Failed to compile the regular expression: %s\n"),PcreError);
+               free(new_alias);
+               return(-1);
+       }
+       len=strlen(Replace);
+       tmp=malloc(len+2);
+       if (!tmp) {
+               debuga(_("Not enough memory to store the host name aliasing directives\n"));
+               pcre_free(new_alias->Re);
+               return(-1);
+       }
+       tmp[0]=ALIAS_PREFIX;
+       memcpy(tmp+1,Replace,len);
+       tmp[len+1]='\0';
+       new_alias->Alias=tmp;
+       
+       // chain it
+       prev_alias=&FirstAliasRe;
+       for (alias=FirstAliasRe ; alias ; alias=alias->Next)
+               prev_alias=&alias->Next;
+       *prev_alias=new_alias;
+               
+       return(1);
+}
+#endif
+
+/*!
+Store an alias in the corresponding list.
+
+\param buf The string to parse and store.
+
+\retval 0 No error.
+\retval -1 Error in file.
+*/
+static int Alias_Store(char *buf)
 {
-       FILE *fi;
-       longline line;
-       char *buf;
        int type;
        const char *name;
        unsigned char ipv4[4];
        unsigned short int ipv6[8];
        int nbits;
        const char *next;
+       int Error;
+       
+#ifdef USE_PCRE
+               if (strncasecmp(buf,"re:",3)==0) {
+                       if (Alias_StoreRegexp(buf+3)<0)
+                               return(-1);
+                       return(0);
+               }
+#endif
+               type=extract_address_mask(buf,&name,ipv4,ipv6,&nbits,&next);
+               if (type<0) {
+                       return(-1);
+               }
+
+               if (type==1) {
+                       Error=Alias_StoreName(name,next);
+               } else if (type==2) {
+                       Error=Alias_StoreIpv4(ipv4,nbits,next);
+               } else if (type==3) {
+                       Error=Alias_StoreIpv6(ipv6,nbits,next);
+               }
+               if (Error<0) return(-1);
+               return(0);
+}
+
+/*!
+Read the file containing the host names to alias in the report.
+
+\param Filename The name of the file.
+*/
+void read_hostalias(const char *Filename)
+{
+       FILE *fi;
+       longline line;
+       char *buf;
 
        if (debug) debuga(_("Reading host alias file \"%s\"\n"),Filename);
        fi=fopen(Filename,"rt");
@@ -440,19 +578,10 @@ void read_hostalias(const char *Filename)
        }
 
        while ((buf=longline_read(fi,line)) != NULL) {
-               type=extract_address_mask(buf,&name,ipv4,ipv6,&nbits,&next);
-               if (type<0) {
+               if (Alias_Store(buf)<0) {
                        debuga(_("While reading \"%s\"\n"),Filename);
                        exit(EXIT_FAILURE);
                }
-
-               if (type==1) {
-                       Alias_StoreName(name,next);
-               } else if (type==2) {
-                       Alias_StoreIpv4(ipv4,nbits,next);
-               } else if (type==3) {
-                       Alias_StoreIpv6(ipv6,nbits,next);
-               }
        }
        
        longline_destroy(&line);
@@ -487,33 +616,55 @@ Free the memory allocated by read_hostalias().
 */
 void free_hostalias(void)
 {
-       struct hostalias_name *alias1;
-       struct hostalias_name *next1;
-       struct hostalias_ipv4 *alias4;
-       struct hostalias_ipv4 *next4;
-       struct hostalias_ipv6 *alias6;
-       struct hostalias_ipv6 *next6;
-       
-       for (alias1=FirstAliasName ; alias1 ; alias1=next1) {
-               next1=alias1->Next;
-               if (alias1->HostName_Prefix) free((void *)alias1->HostName_Prefix);
-               if (alias1->HostName_Suffix) free((void *)alias1->HostName_Suffix);
-               free((void *)alias1->Alias);
-               free(alias1);
-       }
-       FirstAliasName=NULL;
-       for (alias4=FirstAliasIpv4 ; alias4 ; alias4=next4) {
-               next4=alias4->Next;
-               free((void *)alias4->Alias);
-               free(alias4);
-       }
-       FirstAliasIpv4=NULL;
-       for (alias6=FirstAliasIpv6 ; alias6 ; alias6=next6) {
-               next6=alias6->Next;
-               free((void *)alias6->Alias);
-               free(alias6);
-       }
-       FirstAliasIpv6=NULL;
+       {
+               struct hostalias_name *alias1;
+               struct hostalias_name *next1;
+               
+               for (alias1=FirstAliasName ; alias1 ; alias1=next1) {
+                       next1=alias1->Next;
+                       if (alias1->HostName_Prefix) free((void *)alias1->HostName_Prefix);
+                       if (alias1->HostName_Suffix) free((void *)alias1->HostName_Suffix);
+                       free((void *)alias1->Alias);
+                       free(alias1);
+               }
+               FirstAliasName=NULL;
+       }
+       {
+               struct hostalias_ipv4 *alias4;
+               struct hostalias_ipv4 *next4;
+               
+               for (alias4=FirstAliasIpv4 ; alias4 ; alias4=next4) {
+                       next4=alias4->Next;
+                       free((void *)alias4->Alias);
+                       free(alias4);
+               }
+               FirstAliasIpv4=NULL;
+       }
+       {
+               struct hostalias_ipv6 *alias6;
+               struct hostalias_ipv6 *next6;
+               
+               for (alias6=FirstAliasIpv6 ; alias6 ; alias6=next6) {
+                       next6=alias6->Next;
+                       free((void *)alias6->Alias);
+                       free(alias6);
+               }
+               FirstAliasIpv6=NULL;
+       }
+#ifdef USE_PCRE
+       {
+               struct hostalias_regex *alias;
+               struct hostalias_regex *next;
+               
+               for (alias=FirstAliasRe ; alias ; alias=next) {
+                       next=alias->Next;
+                       pcre_free(alias->Re);
+                       free((void *)alias->Alias);
+                       free(alias);
+               }
+               FirstAliasRe=NULL;
+       }
+#endif
 }
 
 /*!
@@ -523,7 +674,7 @@ Replace the host name by its alias if it is in our list.
 
 \return The pointer to the host name or its alias.
 */
-const char *alias_url_name(const char *url,const char *next)
+static const char *alias_url_name(const char *url,const char *next)
 {
        struct hostalias_name *alias;
        int len;
@@ -557,7 +708,7 @@ Replace the IPv4 address by its alias if it is in our list.
 
 \return The pointer to the host name or its alias.
 */
-const char *alias_url_ipv4(const char *url,unsigned char *ipv4)
+static const char *alias_url_ipv4(const char *url,unsigned char *ipv4)
 {
        struct hostalias_ipv4 *alias;
        int len;
@@ -579,7 +730,7 @@ Replace the IPv6 address by its alias if it is in our list.
 
 \return The pointer to the host name or its alias.
 */
-const char *alias_url_ipv6(const char *url,unsigned short int *ipv6)
+static const char *alias_url_ipv6(const char *url,unsigned short int *ipv6)
 {
        struct hostalias_ipv6 *alias;
        int len;
@@ -597,6 +748,27 @@ const char *alias_url_ipv6(const char *url,unsigned short int *ipv6)
        return(url);
 }
 
+#ifdef USE_PCRE
+/*!
+Replace the host name by its alias if it is in our list.
+
+\param url The host name.
+
+\return The pointer to the host name or its alias.
+*/
+static const char *alias_url_regex(const char *url)
+{
+       struct hostalias_regex *alias;
+
+       for (alias=FirstAliasRe ; alias ; alias=alias->Next) {
+               if (pcre_exec(alias->Re,NULL,url,strlen(url),0,0,NULL,0)==0) {
+                       return(alias->Alias);
+               }
+       }
+       return(url);
+}
+#endif
+
 /*!
 Find the beginning of the URL beyond the scheme://
 
@@ -641,6 +813,12 @@ const char *process_url(char *url,bool full_url)
        if (!full_url) {
                for (str=(char *)start ; *str && *str!='/' && *str!='?' ; str++);
                *str='\0';
+#ifdef USE_PCRE
+               if (FirstAliasRe) {
+                       address=alias_url_regex(start);
+                       if (address!=start) return(address);
+               }
+#endif
                type=extract_address_mask(start,&address,ipv4,ipv6,NULL,&next);
                if (type==1) {
                        if (FirstAliasName)