From 6e24f22245f28006cd236d1e43c5f16001a9852c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Fr=C3=A9d=C3=A9ric=20Marchal?= Date: Sun, 12 Feb 2012 19:52:03 +0100 Subject: [PATCH] Add support for regular expressions in aliasing the hosts If libpcre is found on the system, a regular expression can be defined in the hostalias file to replace the matching host names in the report. A regular expression is formated in the host alias file as re:/regexp/ alias The alias is mandatory. The regexp may be delimited by another character. --- CMakeLists.txt | 11 ++ configure.in | 22 ++++ include/config.h.in | 1 + sarg.conf | 3 + url.c | 270 ++++++++++++++++++++++++++++++++++++-------- 5 files changed, 261 insertions(+), 46 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 15a1657..a541a3c 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -231,6 +231,17 @@ IF(HAVE_LDAP_H) ENDIF(LDAP_LIBRARY) ENDIF(HAVE_LDAP_H) +# Find pcre +CHECK_INCLUDE_FILE(pcre.h HAVE_PCRE_H) +IF(HAVE_PCRE_H) + FIND_LIBRARY(PCRE_LIBRARY NAMES pcre DOC "The pcre library") + IF(PCRE_LIBRARY) + TARGET_LINK_LIBRARIES(sarg ${PCRE_LIBRARY}) + SET(HAVE_PCRE PCRE_LIBRARY CACHE PATH DOC "True if pcre was found") + SET(HAVE_PCRE CACHE BOOL DOC "True if must use pcre") + ENDIF(PCRE_LIBRARY) +ENDIF(HAVE_PCRE_H) + # Support for large files OPTION(ENABLE_LARGEFILE "Enable the usage of large files" ON) IF(ENABLE_LARGEFILE) diff --git a/configure.in b/configure.in index 02b13ac..5e938d4 100644 --- a/configure.in +++ b/configure.in @@ -126,6 +126,22 @@ else AC_MSG_NOTICE([Not building with iconv as requested on the configuration command line]) fi +# Build with pcre +AC_ARG_WITH([pcre], +AS_HELP_STRING([--with-pcre],[Compile with support for the Perl Compatible Regular Expressions library]), +[],[with_pcre=check]) +if ( test "x$with_pcre" != "xno" ) ; then + AC_CHECK_HEADERS(pcre.h) + if ( test "x$ac_cv_header_pcre_h" == "xyes" ) ; then + LDFLAGS="$LDFLAGS $(pcre-config --libs)" + CFLAGS="$CFLAGS $(pcre-config --cflags)" + else + pcre_status="not found" + fi +else + pcre_status="disabled" +fi + dnl Checks for typedefs, structures, and compiler characteristics. AC_C_CONST AC_STRUCT_TM @@ -300,3 +316,9 @@ if ( test "x$gd_status" == "xdisabled" ) ; then elif ( test "x$gd_status" == "xnot found" ) ; then AC_MSG_NOTICE([gd.h was not found so the graphs won't be available in the report]) fi + +if ( test "x$pcre_status" == "xdisabled" ) ; then + AC_MSG_NOTICE([Not building with pcre as requested on the configuration command line]) +elif ( test "x$pcre_status" == "xnot found" ) ; then + AC_MSG_NOTICE([pcre.h was not found so the regexp won't be available in the hostalias]) +fi diff --git a/include/config.h.in b/include/config.h.in index bac4370..c042e9c 100644 --- a/include/config.h.in +++ b/include/config.h.in @@ -49,6 +49,7 @@ #cmakedefine HAVE_GETOPT_H #cmakedefine HAVE_WS2TCPIP_H #cmakedefine HAVE_FCNTL_H +#cmakedefine HAVE_PCRE_H #cmakedefine IBERTY_LIB diff --git a/sarg.conf b/sarg.conf index 74caf33..e93441d 100644 --- a/sarg.conf +++ b/sarg.conf @@ -741,6 +741,8 @@ # reports. # IP addresses are supported and accept the CIDR notation both for IPv4 and # IPv6 addresses. +# Regular expressions can also be used if sarg was compiled with libpcre. +# A regular expression is formated as re:/regexp/ alias # # Example: # *.gstatic.com @@ -750,4 +752,5 @@ # *.freeav.net antivirus:freeav # *.mail.live.com # 65.52.00.00/14 *.mail.live.com +# re:/\.dropbox\.com(:443)?/ dropbox #hostalias /usr/local/sarg/hostalias diff --git a/url.c b/url.c index 6e828f9..794dfed 100644 --- a/url.c +++ b/url.c @@ -26,6 +26,12 @@ #include "include/conf.h" #include "include/defs.h" +#ifdef HAVE_PCRE_H +#include +#define USE_PCRE +#else +#error "PCRE not compiled in" +#endif /*! A host name and the name to report. @@ -78,6 +84,21 @@ struct hostalias_ipv6 const char *Alias; }; +#ifdef USE_PCRE +/*! +A regular expression. +*/ +struct hostalias_regex +{ + //! The next regular expression to test. + struct hostalias_regex *Next; + //! The regular expression to match against the host name. + pcre *Re; + //! The replacement name. + const char *Alias; +}; +#endif + //! The first host name. static struct hostalias_name *FirstAliasName=NULL; //! The first IPv4 address. @@ -85,6 +106,10 @@ static struct hostalias_ipv4 *FirstAliasIpv4=NULL; //! The first IPv§ address. static struct hostalias_ipv6 *FirstAliasIpv6=NULL; +#ifdef USE_PCRE +static struct hostalias_regex *FirstAliasRe=NULL; +#endif + /*! Store a name to alias. @@ -410,22 +435,135 @@ static int Alias_StoreIpv6(unsigned short *ipv6,int nbits,const char *next) return(1); } +#ifdef USE_PCRE /*! -Read the file containing the host names to alias in the report. +Store a regular expression to match the alias. -\param Filename The name of the file. +\retval 1 Alias added. +\retval 0 Ignore the line. +\retval -1 Error. */ -void read_hostalias(const char *Filename) +static int Alias_StoreRegexp(char *buf) +{ + char Delimiter; + char *End; + struct hostalias_regex *alias; + struct hostalias_regex *new_alias; + struct hostalias_regex **prev_alias; + const char *PcreError; + int ErrorOffset; + char *Replace; + int len; + char *tmp; + + // find the pattern + Delimiter=*buf++; + for (End=buf ; *End && *End!=Delimiter ; End++) { + if (*End=='\\') { + if (End[1]=='\0') { + debuga(_("Invalid NUL character found in regular expression\n")); + return(-1); + } + End++; //ignore the escaped character + } + } + if (*End!=Delimiter) { + debuga(_("Unterminated regular expression\n")); + return(-1); + } + *End++='\0'; + + // find the alias + for (Replace=End ; *Replace==' ' || *Replace=='\t' ; Replace++); + for (End=Replace ; *End && (unsigned char)*End>' ' ; End++); + *End='\0'; + + // store it + new_alias=malloc(sizeof(*new_alias)); + if (!new_alias) { + debuga(_("Not enough memory to store the host name aliasing directives\n")); + return(-1); + } + new_alias->Next=NULL; + new_alias->Re=pcre_compile(buf,0,&PcreError,&ErrorOffset,NULL); + if (new_alias->Re==NULL) { + debuga(_("Failed to compile the regular expression: %s\n"),PcreError); + free(new_alias); + return(-1); + } + len=strlen(Replace); + tmp=malloc(len+2); + if (!tmp) { + debuga(_("Not enough memory to store the host name aliasing directives\n")); + pcre_free(new_alias->Re); + return(-1); + } + tmp[0]=ALIAS_PREFIX; + memcpy(tmp+1,Replace,len); + tmp[len+1]='\0'; + new_alias->Alias=tmp; + + // chain it + prev_alias=&FirstAliasRe; + for (alias=FirstAliasRe ; alias ; alias=alias->Next) + prev_alias=&alias->Next; + *prev_alias=new_alias; + + return(1); +} +#endif + +/*! +Store an alias in the corresponding list. + +\param buf The string to parse and store. + +\retval 0 No error. +\retval -1 Error in file. +*/ +static int Alias_Store(char *buf) { - FILE *fi; - longline line; - char *buf; int type; const char *name; unsigned char ipv4[4]; unsigned short int ipv6[8]; int nbits; const char *next; + int Error; + +#ifdef USE_PCRE + if (strncasecmp(buf,"re:",3)==0) { + if (Alias_StoreRegexp(buf+3)<0) + return(-1); + return(0); + } +#endif + type=extract_address_mask(buf,&name,ipv4,ipv6,&nbits,&next); + if (type<0) { + return(-1); + } + + if (type==1) { + Error=Alias_StoreName(name,next); + } else if (type==2) { + Error=Alias_StoreIpv4(ipv4,nbits,next); + } else if (type==3) { + Error=Alias_StoreIpv6(ipv6,nbits,next); + } + if (Error<0) return(-1); + return(0); +} + +/*! +Read the file containing the host names to alias in the report. + +\param Filename The name of the file. +*/ +void read_hostalias(const char *Filename) +{ + FILE *fi; + longline line; + char *buf; if (debug) debuga(_("Reading host alias file \"%s\"\n"),Filename); fi=fopen(Filename,"rt"); @@ -440,19 +578,10 @@ void read_hostalias(const char *Filename) } while ((buf=longline_read(fi,line)) != NULL) { - type=extract_address_mask(buf,&name,ipv4,ipv6,&nbits,&next); - if (type<0) { + if (Alias_Store(buf)<0) { debuga(_("While reading \"%s\"\n"),Filename); exit(EXIT_FAILURE); } - - if (type==1) { - Alias_StoreName(name,next); - } else if (type==2) { - Alias_StoreIpv4(ipv4,nbits,next); - } else if (type==3) { - Alias_StoreIpv6(ipv6,nbits,next); - } } longline_destroy(&line); @@ -487,33 +616,55 @@ Free the memory allocated by read_hostalias(). */ void free_hostalias(void) { - struct hostalias_name *alias1; - struct hostalias_name *next1; - struct hostalias_ipv4 *alias4; - struct hostalias_ipv4 *next4; - struct hostalias_ipv6 *alias6; - struct hostalias_ipv6 *next6; - - for (alias1=FirstAliasName ; alias1 ; alias1=next1) { - next1=alias1->Next; - if (alias1->HostName_Prefix) free((void *)alias1->HostName_Prefix); - if (alias1->HostName_Suffix) free((void *)alias1->HostName_Suffix); - free((void *)alias1->Alias); - free(alias1); - } - FirstAliasName=NULL; - for (alias4=FirstAliasIpv4 ; alias4 ; alias4=next4) { - next4=alias4->Next; - free((void *)alias4->Alias); - free(alias4); - } - FirstAliasIpv4=NULL; - for (alias6=FirstAliasIpv6 ; alias6 ; alias6=next6) { - next6=alias6->Next; - free((void *)alias6->Alias); - free(alias6); - } - FirstAliasIpv6=NULL; + { + struct hostalias_name *alias1; + struct hostalias_name *next1; + + for (alias1=FirstAliasName ; alias1 ; alias1=next1) { + next1=alias1->Next; + if (alias1->HostName_Prefix) free((void *)alias1->HostName_Prefix); + if (alias1->HostName_Suffix) free((void *)alias1->HostName_Suffix); + free((void *)alias1->Alias); + free(alias1); + } + FirstAliasName=NULL; + } + { + struct hostalias_ipv4 *alias4; + struct hostalias_ipv4 *next4; + + for (alias4=FirstAliasIpv4 ; alias4 ; alias4=next4) { + next4=alias4->Next; + free((void *)alias4->Alias); + free(alias4); + } + FirstAliasIpv4=NULL; + } + { + struct hostalias_ipv6 *alias6; + struct hostalias_ipv6 *next6; + + for (alias6=FirstAliasIpv6 ; alias6 ; alias6=next6) { + next6=alias6->Next; + free((void *)alias6->Alias); + free(alias6); + } + FirstAliasIpv6=NULL; + } +#ifdef USE_PCRE + { + struct hostalias_regex *alias; + struct hostalias_regex *next; + + for (alias=FirstAliasRe ; alias ; alias=next) { + next=alias->Next; + pcre_free(alias->Re); + free((void *)alias->Alias); + free(alias); + } + FirstAliasRe=NULL; + } +#endif } /*! @@ -523,7 +674,7 @@ Replace the host name by its alias if it is in our list. \return The pointer to the host name or its alias. */ -const char *alias_url_name(const char *url,const char *next) +static const char *alias_url_name(const char *url,const char *next) { struct hostalias_name *alias; int len; @@ -557,7 +708,7 @@ Replace the IPv4 address by its alias if it is in our list. \return The pointer to the host name or its alias. */ -const char *alias_url_ipv4(const char *url,unsigned char *ipv4) +static const char *alias_url_ipv4(const char *url,unsigned char *ipv4) { struct hostalias_ipv4 *alias; int len; @@ -579,7 +730,7 @@ Replace the IPv6 address by its alias if it is in our list. \return The pointer to the host name or its alias. */ -const char *alias_url_ipv6(const char *url,unsigned short int *ipv6) +static const char *alias_url_ipv6(const char *url,unsigned short int *ipv6) { struct hostalias_ipv6 *alias; int len; @@ -597,6 +748,27 @@ const char *alias_url_ipv6(const char *url,unsigned short int *ipv6) return(url); } +#ifdef USE_PCRE +/*! +Replace the host name by its alias if it is in our list. + +\param url The host name. + +\return The pointer to the host name or its alias. +*/ +static const char *alias_url_regex(const char *url) +{ + struct hostalias_regex *alias; + + for (alias=FirstAliasRe ; alias ; alias=alias->Next) { + if (pcre_exec(alias->Re,NULL,url,strlen(url),0,0,NULL,0)==0) { + return(alias->Alias); + } + } + return(url); +} +#endif + /*! Find the beginning of the URL beyond the scheme:// @@ -641,6 +813,12 @@ const char *process_url(char *url,bool full_url) if (!full_url) { for (str=(char *)start ; *str && *str!='/' && *str!='?' ; str++); *str='\0'; +#ifdef USE_PCRE + if (FirstAliasRe) { + address=alias_url_regex(start); + if (address!=start) return(address); + } +#endif type=extract_address_mask(start,&address,ipv4,ipv6,NULL,&next); if (type==1) { if (FirstAliasName) -- 2.47.2