From: Frédéric Marchal Date: Mon, 18 Jan 2010 20:32:24 +0000 (+0000) Subject: Fixed exclude_hosts to exclude subdomains and IPv4 subnets X-Git-Tag: v2_2_7~20 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=43f18f452ab0b41b5940b4e57a05a9cf69fd9f36;p=thirdparty%2Fsarg.git Fixed exclude_hosts to exclude subdomains and IPv4 subnets --- diff --git a/CMakeLists.txt b/CMakeLists.txt index d32f420..7c16fa7 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ PROJECT(sarg C) SET(sarg_VERSION 2) SET(sarg_REVISION 2) SET(sarg_BUILD "7rc1") -SET(sarg_BUILDDATE "Jan-14-2010") +SET(sarg_BUILDDATE "Jan-18-2010") INCLUDE(AddFileDependencies) INCLUDE(CheckIncludeFile) diff --git a/ChangeLog b/ChangeLog index cacc265..f63da96 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,6 @@ SARG ChangeLog -Jan-06-2010 Version 2.2.7 +Jan-18-2010 Version 2.2.7 - Extra compile and run time protection (FORTIFY_SOURCE) fixed in configure. - Use tabulations as columns separator in intermediary files to avoid problems when a field of the log contains a space. - Input log file type detection partly rewritten to clearly distinguish which type is processed where. @@ -17,6 +17,7 @@ Jan-06-2010 Version 2.2.7 - Only copy the files when creating the directory with the images to link the reports to. - Directories deleted without using the rm system command. - Index created using an internal sort algorithm instead of a system call. + - Fixed exclude_hosts to exclude subdomains and IPv4 subnets. Jan-06-2010 Version 2.2.6.1 - Remove unnecessary dependency on off_t. diff --git a/documentation/exclude.txt b/documentation/exclude.txt index 9140048..8374da3 100644 --- a/documentation/exclude.txt +++ b/documentation/exclude.txt @@ -5,11 +5,63 @@ +/*!\fn static void store_exclude_ip4(unsigned short int *addr,int mask) +Store an IPv4 address to exclude from the report. A mask may be provided +to exclude a subnet. -/*! \fn int vhexclude(const char *excludefile, const char *url) +\param addr The numbers of the IP address. +\param mask The number of bits to mask when matching the IP address. +*/ + + + + +/*! \fn static void store_exclude_url(char *url,int length) +Store an URL to exclude from the report. + +If the URL contains a wildcard (*), only the part of the URL after +the first dot encountered after the wildcard is stored. It serves +to exclude a subdomain. + +\param url The URL to exclude. +\param length The length of the URL in bytes. +*/ + + + + + +/*!\fn static void gethexclude(const char *hexfile, int debug) +Get the list of the hosts to exclude from the report. + +You must call free_exclude() to free the memory allocated by this function. + +\param hexfile The name of the file to read. There is one host per line. Any line +containing a # anywhere in the line is a comment. +\param debug Set to \c true to print debug informations. +*/ + + + + + +/*! \fn static void getuexclude(const char *uexfile, int debug) +Get the list of the users to exclude from the report. + +You must call free_exclude() to free the memory allocated by this function. + +\param uexfile The name of the file to read. There is one user per line. Any line +containing a # anywhere in the line is a comment. +\param debug Set to \c true to print debug informations. +*/ + + + + + +/*! \fn int vhexclude(const char *url) Tell if the site accessed by the user is excluded from the report. -\param excludefile The list of the excluded sites as loaded by gethexclude(). \param url The URL to check. \retval 1 The site is not excluded. @@ -20,12 +72,30 @@ Tell if the site accessed by the user is excluded from the report. -/*! \fn int vuexclude(const char *excludeuser, const char *user) +/*! \fn int vuexclude(const char *user) Tell if the user is excluded from the report. -\param excludeuser The list of the excluded users as loaded by getuexclude(). \param user The user to check. \retval 1 The user is not excluded. \retval 0 The user is excluded. */ + + + + + +/*! \fn int is_indexonly(void) +Tell if the exclusion list of the users mention the word "indexonly" meaning that +the user only want to produce the index.html. + +\return \c True if only the index is requested or \c false if normal processing is requested. +*/ + + + + + +/*! \fn void free_exclude(void) +Free the memory allocated by gethexclude() and getuexclude(). +*/ diff --git a/documentation/log.txt b/documentation/log.txt index fd0732f..0d9288a 100644 --- a/documentation/log.txt +++ b/documentation/log.txt @@ -89,25 +89,3 @@ only the first column is kept. but the names are not stored in a way that is usable by that function. */ - - - - -/*!\fn static void gethexclude(const char *hexfile, int debug) -Get the list of the hosts to exclude from the report. - -\param hexfile The name of the file to read. There is one host per line. Any line -containing a # anywhere in the line is a comment. -\param debug Set to \c true to print debug informations. -*/ - - - - -/*! \fn static void getuexclude(const char *uexfile, int debug) -Get the list of the users to exclude from the report. - -\param uexfile The name of the file to read. There is one user per line. Any line -containing a # anywhere in the line is a comment. -\param debug Set to \c true to print debug informations. -*/ \ No newline at end of file diff --git a/exclude.c b/exclude.c index cdba3ac..fd3e950 100644 --- a/exclude.c +++ b/exclude.c @@ -26,72 +26,378 @@ #include "include/conf.h" #include "include/defs.h" -int vhexclude(const char *excludefile, const char *url) +struct hostip4struct { + //! The IP address. + unsigned long int address; + //! The mask to match the address of the URL. + unsigned long int mask; +}; - char *whost; - char *whost_next; - char *whost_dom; - char *str; - char *wurl; - char *wurl_dom; - char *port; +struct hostnamestruct +{ + //! The URL to match without any leading wildcard. + char *url; + //! The number of dots in the url if a wildcard is present or -1 if the address is complete (no wildcard) + int ndots; +}; + +static struct hostip4struct *exclude_ip4=NULL; +static int num_exclude_ip4=0; +static struct hostnamestruct *exclude_name=NULL; +static int num_exclude_name=0; +static int ip4allocated=0; +static int nameallocated=0; + +static char *excludeuser=NULL; + +static void store_exclude_ip4(unsigned short int *addr,int mask) +{ + int i; - if((str=strdup(excludefile))==NULL) { - fprintf(stderr, "SARG: %s (%u):\n",text[59], (unsigned int)(strlen(excludefile)+1)); + if (num_exclude_ip4>=ip4allocated) { + struct hostip4struct *temp; + + ip4allocated+=5; + temp=realloc(exclude_ip4,ip4allocated*sizeof(*temp)); + if (temp==NULL) { + fprintf(stderr,"SARG: Not enough memory to store the exlcluded IP addresses\n"); exit(1); + } + exclude_ip4=temp; } - if((wurl=strdup(url))==NULL) { - fprintf(stderr, "SARG: %s (%u):\n",text[59], (unsigned int)(strlen(url)+1)); + exclude_ip4[num_exclude_ip4].address=0UL; + for (i=0 ; i<4 ; i++) + exclude_ip4[num_exclude_ip4].address=(exclude_ip4[num_exclude_ip4].address<<8) | (unsigned char)(addr[i] & 0xFFU); + exclude_ip4[num_exclude_ip4].mask=(0xFFFFFFFFUL << (32-mask)); + num_exclude_ip4++; +} + +static void store_exclude_url(char *url,int length) +{ + int start; + int i; + int ndots, firstdot; + struct hostnamestruct *item; + + start=0; + ndots=-1; + firstdot=0; + for (i=0 ; i=0) + ndots++; + } + if (start>=length || firstdot) return; + if (start>0) { + url+=start; + length-=start; + } + + if (num_exclude_name>=nameallocated) { + struct hostnamestruct *temp; + + nameallocated+=5; + temp=realloc(exclude_name,nameallocated*sizeof(*temp)); + if (temp==NULL) { + fprintf(stderr,"SARG: Not enough memory to store the exlcluded URL\n"); exit(1); + } + exclude_name=temp; + } + + item=exclude_name+num_exclude_name; + num_exclude_name++; + item->url=malloc(length+1); + if (!item->url) { + fprintf(stderr,"SARG: Not enough memory to store the excluded URL\n"); + exit(1); + } + strncpy(item->url,url,length); + item->url[length]='\0'; + item->ndots=(ndots>0) ? ndots : -1; +} + + +void gethexclude(const char *hexfile, int debug) +{ + + FILE *fp_ex; + char buf[255]; + int i; + int ip_size; + unsigned int value4, value6; + unsigned short int addr[8]; + int addr_len; + int mask, max_mask; + + if(access(hexfile, R_OK) != 0) { + debuga("Cannot open exclude_hosts file: %s - %s",hexfile,strerror(errno)); + exit(1); } - whost=str; - whost_next=strchr(whost,' '); - if (whost_next) *whost_next++ = '\0'; + if(debug) + debuga("%s: %s",text[67],hexfile); - port=strchr(wurl,':'); - if(port != NULL) { - // remove the port number at the end of the site's address - *port='\0'; + if ((fp_ex = fopen(hexfile, "r")) == NULL) { + fprintf(stderr, "SARG: (gethexclude) %s: %s - %s\n",text[45],hexfile,strerror(errno)); + exit(1); } - wurl_dom=strchr(wurl,'.'); - while(strcmp(whost,"*END*") != 0) { - if(strcmp(wurl,whost) == 0) { - free(wurl); - free(str); - return(0); + while(fgets(buf,sizeof(buf),fp_ex)!=NULL){ + if(strchr(buf,'#') != NULL) + continue; + fixendofline(buf); + ip_size=0x60 | 0x04; + value4=0U; + value6=0U; + addr_len=0; + for (i=0 ; (unsigned char)buf[i]>' ' && buf[i]!='/' ; i++) { + if (ip_size & 0x04) { + if (isdigit(buf[i])) { + value4=value4*10+(buf[i]-'0'); + if (value4>0xFFU) ip_size&=~0x04; + } else if (buf[i]=='.' && addr_len<4) { + addr[addr_len++]=(unsigned short)(value4 & 0xFFU); + value4=0U; + } else { + ip_size&=~0x04; + } + } + if (ip_size & 0x60) { + if (isdigit(buf[i])) { + value6=(value6<<4)+(buf[i]-'0'); + if (value6>0xFFFFU) ip_size&=~0x60; + } else if (toupper(buf[i])>='A' && toupper(buf[i])<='F') { + value6=(value6<<4)+(toupper(buf[i])-'A'+10); + if (value6>0xFFFFU) ip_size&=~0x60; + } else if (buf[i]==':' && addr_len<8) { + addr[addr_len++]=(unsigned short)(value6 & 0xFFFFU); + value6=0U; + } else { + ip_size&=~0x60; + } + } + } + if (i==0) continue; + if (ip_size & 0x04) { + if (addr_len!=3) + ip_size&=~0x04; + else + addr[addr_len++]=(unsigned short)(value4 & 0xFFU); + } + if (ip_size & 0x60) { + if (addr_len>=8) + ip_size&=~0x60; + else + addr[addr_len++]=(unsigned short)(value6 & 0xFFFFU); } - if(wurl_dom != NULL && strchr(whost,'*') != 0) { - whost_dom=strchr(whost,'.'); - if (whost_dom != NULL && strcmp(wurl_dom,whost_dom) == 0) { - free(wurl); - free(str); - return(0); + if (ip_size) { + max_mask=(ip_size & 0x04) ? 4*8 : 8*16; + if (buf[i]=='/') { + mask=atoi(buf+i+1); + if (mask<0 || mask>max_mask) mask=max_mask; + } else + mask=max_mask; + if (ip_size & 0x04) + store_exclude_ip4(addr,mask); + else { + fprintf(stderr,"SARG: IPv6 addresses are not supported (found in %s)\n",hexfile); + exit(1); } + } else { + store_exclude_url(buf,i); } - if (whost_next == NULL) break; - whost=whost_next; - whost_next=strchr(whost,' '); - if (whost_next) *whost_next++ = '\0'; } - free(wurl); - free(str); + + fclose(fp_ex); + + return; +} + +int vhexclude(const char *url) +{ + int i, j; + int length; + int ip_size; + unsigned int value4, value6; + unsigned long int addr4; + unsigned short int addr6[8]; + int addr_len; + int dotpos[10]; + int ndots; + + ip_size=0x60 | 0x04; + addr4=0UL; + value4=0U; + value6=0U; + addr_len=0; + for (i=0 ; (unsigned char)url[i]>' ' && url[i]!='/' && url[i]!='?'&& ((ip_size & 0x60)!=0 || url[i]!=':') && ip_size ; i++) { + if (ip_size & 0x04) { + if (isdigit(url[i])) { + value4=value4*10+(url[i]-'0'); + if (value4>0xFFU) ip_size&=~0x04; + } else if (url[i]=='.' && addr_len<4) { + addr_len++; + addr4=(addr4<<8) | (unsigned long int)(value4 & 0xFFU); + value4=0U; + } else { + ip_size&=~0x04; + } + } + if (ip_size & 0x60) { + if (isdigit(url[i])) { + value6=(value6<<4)+(url[i]-'0'); + if (value6>0xFFFFU) ip_size&=~0x60; + } else if (toupper(url[i])>='A' && toupper(url[i])<='F') { + value6=(value6<<4)+(toupper(url[i])-'A'+10); + if (value6>0xFFFFU) ip_size&=~0x60; + } else if (url[i]==':' && addr_len<8) { + addr6[addr_len++]=(unsigned short)(value6 & 0xFFFFU); + value6=0U; + } else { + ip_size&=~0x60; + } + } + } + if ((ip_size & 0x04) && addr_len==3) { + if (exclude_ip4 == NULL) return(1); + addr4=(addr4<<8) | (unsigned long int)(value4 & 0xFFU); + for (i=0 ; i' ' && url[length]!=':' && url[length]!='/' && url[length]!='?' ; length++) + if (url[length]=='.') { + /* + We store the position of each dots of the URL to match it against any + wildcard in the excluded list. The size of dotpos is big enough for the most + ambitious URL but we have a safety mechanism that shift the positions should there be too + many dots in the URL. + */ + if (ndots0) { + for (i=0 ; i0) { + const char *wurl=url; + int len=length; + if (exclude_name[i].ndots<=ndots) { + wurl+=dotpos[ndots-exclude_name[i].ndots]; + len-=dotpos[ndots-exclude_name[i].ndots]; + } + if (strncmp(exclude_name[i].url,wurl,len)==0 && exclude_name[i].url[len]=='\0') return(0); + } else { + if (strncmp(exclude_name[i].url,url,length)==0 && exclude_name[i].url[length]=='\0') return(0); + } + } + } + } + return(1); } -int vuexclude(const char *excludeuser, const char *user) +void getuexclude(const char *uexfile, int debug) +{ + + FILE *fp_ex; + char buf[255]; + long int nreg=0; + + if(debug) + debuga("%s: %s",text[67],uexfile); + + if ((fp_ex = fopen(uexfile, "r")) == NULL) { + fprintf(stderr, "SARG: (gethexclude) %s: %s - %s\n",text[45],uexfile,strerror(errno)); + exit(1); + } + + fseek(fp_ex, 0, SEEK_END); + nreg = ftell(fp_ex); + if (nreg<0) { + printf("SARG: Cannot get the size of file %s",uexfile); + exit(1); + } + nreg += 11; + fseek(fp_ex, 0, SEEK_SET); + + if((excludeuser=(char *) malloc(nreg))==NULL){ + fprintf(stderr, "SARG: %s (%ld):\n",text[59],nreg); + exit(1); + } + + bzero(excludeuser,nreg); + + while(fgets(buf,sizeof(buf),fp_ex)!=NULL){ + if(strchr(buf,'#') != NULL) + continue; + fixendofline(buf); + strcat(excludeuser,buf); + strcat(excludeuser," "); + } + + strcat(excludeuser,"*END* "); + + fclose(fp_ex); + + return; +} + +int vuexclude(const char *user) { const char *wuser; int len; - len=strlen(user); - wuser=excludeuser; - while ((wuser=strstr(wuser,user))!=NULL) { - if (wuser[len]==' ') return(0); - wuser+=len; + if (excludeuser) { + len=strlen(user); + wuser=excludeuser; + while ((wuser=strstr(wuser,user))!=NULL) { + if (wuser[len]==' ') return(0); + wuser+=len; + } } return(1); } + +int is_indexonly(void) +{ + if (excludeuser==NULL) return(0); + return(strstr(excludeuser,"indexonly") != NULL); +} + +void free_exclude(void) +{ + int i; + + if (exclude_ip4) { + free(exclude_ip4); + exclude_ip4=NULL; + } + + if (exclude_name) { + for (i=0 ; id_name); } } + closedir(dirp); if(strcmp(IndexTree,"date") == 0) { make_date_index(); diff --git a/log.c b/log.c index 624bb53..f45dc1c 100644 --- a/log.c +++ b/log.c @@ -30,16 +30,12 @@ char *userfile; -char *excludefile; -char *excludeuser; numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 }; numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 }; static void getusers(const char *pwdfile, int debug); -static void gethexclude(const char *hexfile, int debug); -static void getuexclude(const char *uexfile, int debug); int main(int argc,char *argv[]) { @@ -484,11 +480,7 @@ int main(int argc,char *argv[]) if(hexclude[0] == '\0') strcpy(hexclude,ExcludeHosts); - if(strlen(hexclude) > 0) { - if(access(hexclude, R_OK) != 0) { - debuga("Cannot open exclude_hosts file: %s - %s",hexclude,strerror(errno)); - exit(1); - } + if(hexclude[0] != '\0') { gethexclude(hexclude,debug); fhost++; } @@ -503,7 +495,7 @@ int main(int argc,char *argv[]) indexonly=0; if(fuser) { - if(strstr(excludeuser,"indexonly") != 0) + if(is_indexonly()) indexonly++; } if(strcmp(ExcludeUsers,"indexonly") == 0) indexonly++; @@ -1245,8 +1237,8 @@ int main(int argc,char *argv[]) l=1;else l=0; } if(fhost) { -// l=vhexclude(excludefile,ip); - l=vhexclude(excludefile,url); +// l=vhexclude(ip); + l=vhexclude(url); if(!l) { if (debugm) printf("Excluded site: %s\n",url); totregsx++; @@ -1317,7 +1309,7 @@ int main(int argc,char *argv[]) if(l) { if(fuser) { - l=vuexclude(excludeuser,user); + l=vuexclude(user); if(!l) { if (debugm) printf("Excluded user: %s\n",user); totregsx++; @@ -1428,6 +1420,10 @@ int main(int argc,char *argv[]) if (fp_Write_User) fclose (fp_Write_User); + free_download(); + free_excludecodes(); + free_exclude(); + if(debug) { int totalcount=0; @@ -1457,14 +1453,8 @@ int main(int argc,char *argv[]) fclose(fp_denied); if(fp_authfail) fclose(fp_authfail); - free_excludecodes(); if(userfile) free(userfile); - if(excludefile) - free(excludefile); - if(excludeuser) - free(excludeuser); - free_download(); unlink(tmp4); unlink(tmp6); unlink(tmp3); @@ -1480,14 +1470,8 @@ int main(int argc,char *argv[]) fclose(fp_denied); if(fp_authfail) fclose(fp_authfail); - free_excludecodes(); if(userfile) free(userfile); - if(excludefile) - free(excludefile); - if(excludeuser) - free(excludeuser); - free_download(); exit(0); } @@ -1576,14 +1560,8 @@ int main(int argc,char *argv[]) unlinkdir(tmp,0); } - free_excludecodes(); if(userfile) free(userfile); - if(excludefile) - free(excludefile); - if(excludeuser) - free(excludeuser); - free_download(); if(debug) debuga("%s",text[21]); @@ -1640,97 +1618,3 @@ static void getusers(const char *pwdfile, int debug) return; } - - -static void gethexclude(const char *hexfile, int debug) -{ - - FILE *fp_ex; - char buf[255]; - long int nreg=0; - - if(debug) - debuga("%s: %s",text[67],hexfile); - - if ((fp_ex = fopen(hexfile, "r")) == NULL) { - fprintf(stderr, "SARG: (gethexclude) %s: %s - %s\n",text[45],hexfile,strerror(errno)); - exit(1); - } - - fseek(fp_ex, 0, SEEK_END); - nreg = ftell(fp_ex); - if (nreg<0) { - printf("SARG: Cannot get the size of file %s",hexfile); - exit(1); - } - nreg += 11; - fseek(fp_ex, 0, SEEK_SET); - - if((excludefile=(char *) malloc(nreg))==NULL){ - fprintf(stderr, "SARG: %s (%ld):\n",text[59],nreg); - exit(1); - } - - bzero(excludefile,nreg); - - while(fgets(buf,sizeof(buf),fp_ex)!=NULL){ - if(strchr(buf,'#') != NULL) - continue; - fixendofline(buf); - strcat(excludefile,buf); - strcat(excludefile," "); - } - - strcat(excludefile,"*END* "); - - fclose(fp_ex); - - return; -} - - -static void getuexclude(const char *uexfile, int debug) -{ - - FILE *fp_ex; - char buf[255]; - long int nreg=0; - - if(debug) - debuga("%s: %s",text[67],uexfile); - - if ((fp_ex = fopen(uexfile, "r")) == NULL) { - fprintf(stderr, "SARG: (gethexclude) %s: %s - %s\n",text[45],uexfile,strerror(errno)); - exit(1); - } - - fseek(fp_ex, 0, SEEK_END); - nreg = ftell(fp_ex); - if (nreg<0) { - printf("SARG: Cannot get the size of file %s",uexfile); - exit(1); - } - nreg += 11; - fseek(fp_ex, 0, SEEK_SET); - - if((excludeuser=(char *) malloc(nreg))==NULL){ - fprintf(stderr, "SARG: %s (%ld):\n",text[59],nreg); - exit(1); - } - - bzero(excludeuser,nreg); - - while(fgets(buf,sizeof(buf),fp_ex)!=NULL){ - if(strchr(buf,'#') != NULL) - continue; - fixendofline(buf); - strcat(excludeuser,buf); - strcat(excludeuser," "); - } - - strcat(excludeuser,"*END* "); - - fclose(fp_ex); - - return; -} diff --git a/sarg.conf b/sarg.conf index 25312be..7d7d696 100644 --- a/sarg.conf +++ b/sarg.conf @@ -179,10 +179,10 @@ # TAG: exclude_hosts file # Hosts, domains or subnets will be excluded from reports. # -# Eg.: 192.168.10.10 - exclude ip address only -# 192.168.10.0 - exclude full C class -# s1.acme.foo - exclude hostname only -# acme.foo - exclude full domain name +# Eg.: 192.168.10.10 - exclude ip address only +# 192.168.10.0/24 - exclude full C class +# s1.acme.foo - exclude hostname only +# *.acme.foo - exclude full domain name # #exclude_hosts none