/*
- * AUTHOR: Pedro Lineu Orso pedro.orso@gmail.com
- * 1998, 2010
* SARG Squid Analysis Report Generator http://sarg.sourceforge.net
+ * 1998, 2015
*
* SARG donations:
* please look at http://sarg.sourceforge.net/donations.php
+ * Support:
+ * http://sourceforge.net/projects/sarg/forums/forum/363374
* ---------------------------------------------------------------------
*
* This program is free software; you can redistribute it and/or modify
struct hostip4struct
{
- //! The IP address.
- unsigned long int address;
- //! The mask to match the address of the URL.
- unsigned long int mask;
+ //! The IP address.
+ unsigned long int address;
+ //! The mask to match the address of the URL.
+ unsigned long int mask;
+};
+
+struct hostip6struct
+{
+ //! The IP address.
+ unsigned short int address[8];
+ //! The number of bits in the prefix.
+ int nbits;
};
struct hostnamestruct
{
- //! The URL to match without any leading wildcard.
- char *url;
- //! The number of dots in the url if a wildcard is present or -1 if the address is complete (no wildcard)
- int ndots;
+ //! The URL to match without any leading wildcard.
+ char *url;
+ //! The number of dots in the url if a wildcard is present or -1 if the address is complete (no wildcard)
+ int ndots;
};
static struct hostip4struct *exclude_ip4=NULL;
static int num_exclude_ip4=0;
+static struct hostip6struct *exclude_ip6=NULL;
+static int num_exclude_ip6=0;
static struct hostnamestruct *exclude_name=NULL;
static int num_exclude_name=0;
static int ip4allocated=0;
+static int ip6allocated=0;
static int nameallocated=0;
static char *excludeuser=NULL;
-static void store_exclude_ip4(unsigned short int *addr,int mask)
-{
- int i;
-
- if (num_exclude_ip4>=ip4allocated) {
- struct hostip4struct *temp;
-
- ip4allocated+=5;
- temp=realloc(exclude_ip4,ip4allocated*sizeof(*temp));
- if (temp==NULL) {
- fprintf(stderr,"SARG: Not enough memory to store the exlcluded IP addresses\n");
- exit(1);
- }
- exclude_ip4=temp;
- }
- exclude_ip4[num_exclude_ip4].address=0UL;
- for (i=0 ; i<4 ; i++)
- exclude_ip4[num_exclude_ip4].address=(exclude_ip4[num_exclude_ip4].address<<8) | (unsigned char)(addr[i] & 0xFFU);
- exclude_ip4[num_exclude_ip4].mask=(0xFFFFFFFFUL << (32-mask));
- num_exclude_ip4++;
-}
+/*!
+ Store a IPv4 address to exclude from the reported URL.
-static void store_exclude_url(char *url,int length)
+ \param addr The 4 char of the address.
+ \param nbits The number of bits to keep in the prefix.
+ */
+static void store_exclude_ip4(unsigned char *addr,int nbits)
{
- int start;
- int i;
- int ndots, firstdot;
- struct hostnamestruct *item;
-
- start=0;
- ndots=-1;
- firstdot=0;
- for (i=0 ; i<length ; i++)
- if (url[i]=='*') {
- firstdot=1;
- } else if (url[i]=='.') {
- if (firstdot) {
- firstdot=0;
- ndots=1;
- start=i+1;
- } else if (ndots>=0)
- ndots++;
- }
- if (start>=length || firstdot) return;
- if (start>0) {
- url+=start;
- length-=start;
- }
-
- if (num_exclude_name>=nameallocated) {
- struct hostnamestruct *temp;
-
- nameallocated+=5;
- temp=realloc(exclude_name,nameallocated*sizeof(*temp));
- if (temp==NULL) {
- fprintf(stderr,"SARG: Not enough memory to store the exlcluded URL\n");
- exit(1);
- }
- exclude_name=temp;
- }
-
- item=exclude_name+num_exclude_name;
- num_exclude_name++;
- item->url=malloc(length+1);
- if (!item->url) {
- fprintf(stderr,"SARG: Not enough memory to store the excluded URL\n");
- exit(1);
- }
- strncpy(item->url,url,length);
- item->url[length]='\0';
- item->ndots=(ndots>0) ? ndots : -1;
+ int i;
+
+ if (num_exclude_ip4>=ip4allocated) {
+ struct hostip4struct *temp;
+
+ ip4allocated+=5;
+ temp=realloc(exclude_ip4,ip4allocated*sizeof(*temp));
+ if (temp==NULL) {
+ debuga(__FILE__,__LINE__,_("Not enough memory to store the exlcluded IP addresses\n"));
+ exit(EXIT_FAILURE);
+ }
+ exclude_ip4=temp;
+ }
+ exclude_ip4[num_exclude_ip4].address=0UL;
+ for (i=0 ; i<4 ; i++)
+ exclude_ip4[num_exclude_ip4].address=(exclude_ip4[num_exclude_ip4].address<<8) | (unsigned char)(addr[i] & 0xFFU);
+ exclude_ip4[num_exclude_ip4].mask=(0xFFFFFFFFUL << (32-nbits));
+ num_exclude_ip4++;
}
+/*!
+ Store a IPv6 address to exclude from the reported URL.
-void gethexclude(const char *hexfile, int debug)
+ \param addr The 8 short int of the address.
+ \param nbits The number of bits to keep in the prefix.
+ */
+static void store_exclude_ip6(unsigned short *addr,int nbits)
{
-
- FILE *fp_ex;
- char buf[255];
- int i;
- int ip_size;
- unsigned int value4, value6;
- unsigned short int addr[8];
- int addr_len;
- int mask, max_mask;
-
- if(access(hexfile, R_OK) != 0) {
- debuga("Cannot open exclude_hosts file: %s - %s",hexfile,strerror(errno));
- exit(1);
- }
- if(debug)
- debuga("%s: %s",text[67],hexfile);
-
- if ((fp_ex = fopen(hexfile, "r")) == NULL) {
- fprintf(stderr, "SARG: (gethexclude) %s: %s - %s\n",text[45],hexfile,strerror(errno));
- exit(1);
- }
-
- while(fgets(buf,sizeof(buf),fp_ex)!=NULL){
- if(strchr(buf,'#') != NULL)
- continue;
- fixendofline(buf);
- ip_size=0x60 | 0x04;
- value4=0U;
- value6=0U;
- addr_len=0;
- for (i=0 ; (unsigned char)buf[i]>' ' && buf[i]!='/' ; i++) {
- if (ip_size & 0x04) {
- if (isdigit(buf[i])) {
- value4=value4*10+(buf[i]-'0');
- if (value4>0xFFU) ip_size&=~0x04;
- } else if (buf[i]=='.' && addr_len<4) {
- addr[addr_len++]=(unsigned short)(value4 & 0xFFU);
- value4=0U;
- } else {
- ip_size&=~0x04;
- }
- }
- if (ip_size & 0x60) {
- if (isdigit(buf[i])) {
- value6=(value6<<4)+(buf[i]-'0');
- if (value6>0xFFFFU) ip_size&=~0x60;
- } else if (toupper(buf[i])>='A' && toupper(buf[i])<='F') {
- value6=(value6<<4)+(toupper(buf[i])-'A'+10);
- if (value6>0xFFFFU) ip_size&=~0x60;
- } else if (buf[i]==':' && addr_len<8) {
- addr[addr_len++]=(unsigned short)(value6 & 0xFFFFU);
- value6=0U;
- } else {
- ip_size&=~0x60;
- }
- }
- }
- if (i==0) continue;
- if (ip_size & 0x04) {
- if (addr_len!=3)
- ip_size&=~0x04;
- else
- addr[addr_len++]=(unsigned short)(value4 & 0xFFU);
- }
- if (ip_size & 0x60) {
- if (addr_len>=8)
- ip_size&=~0x60;
- else
- addr[addr_len++]=(unsigned short)(value6 & 0xFFFFU);
- }
- if (ip_size) {
- max_mask=(ip_size & 0x04) ? 4*8 : 8*16;
- if (buf[i]=='/') {
- mask=atoi(buf+i+1);
- if (mask<0 || mask>max_mask) mask=max_mask;
- } else
- mask=max_mask;
- if (ip_size & 0x04)
- store_exclude_ip4(addr,mask);
- else {
- fprintf(stderr,"SARG: IPv6 addresses are not supported (found in %s)\n",hexfile);
- exit(1);
- }
- } else {
- store_exclude_url(buf,i);
- }
- }
-
- fclose(fp_ex);
-
- return;
+ int i;
+
+ if (num_exclude_ip6>=ip6allocated) {
+ struct hostip6struct *temp;
+
+ ip6allocated+=5;
+ temp=realloc(exclude_ip6,ip6allocated*sizeof(*temp));
+ if (temp==NULL) {
+ debuga(__FILE__,__LINE__,_("Not enough memory to store the exlcluded IP addresses\n"));
+ exit(EXIT_FAILURE);
+ }
+ exclude_ip6=temp;
+ }
+ for (i=0 ; i<8 ; i++)
+ exclude_ip6[num_exclude_ip6].address[i]=addr[i];
+ exclude_ip6[num_exclude_ip6].nbits=nbits;
+ num_exclude_ip6++;
}
-int vhexclude(const char *url)
+/*!
+ Store a host name to exclude from the report.
+
+ \param url The host name to exclude.
+ */
+static void store_exclude_url(const char *url,const char *next)
{
- int i, j;
- int length;
- int ip_size;
- unsigned int value4, value6;
- unsigned long int addr4;
- unsigned short int addr6[8];
- int addr_len;
- int dotpos[10];
- int ndots;
-
- ip_size=0x60 | 0x04;
- addr4=0UL;
- value4=0U;
- value6=0U;
- addr_len=0;
- for (i=0 ; (unsigned char)url[i]>' ' && url[i]!='/' && url[i]!='?'&& ((ip_size & 0x60)!=0 || url[i]!=':') && ip_size ; i++) {
- if (ip_size & 0x04) {
- if (isdigit(url[i])) {
- value4=value4*10+(url[i]-'0');
- if (value4>0xFFU) ip_size&=~0x04;
- } else if (url[i]=='.' && addr_len<4) {
- addr_len++;
- addr4=(addr4<<8) | (unsigned long int)(value4 & 0xFFU);
- value4=0U;
- } else {
- ip_size&=~0x04;
- }
- }
- if (ip_size & 0x60) {
- if (isdigit(url[i])) {
- value6=(value6<<4)+(url[i]-'0');
- if (value6>0xFFFFU) ip_size&=~0x60;
- } else if (toupper(url[i])>='A' && toupper(url[i])<='F') {
- value6=(value6<<4)+(toupper(url[i])-'A'+10);
- if (value6>0xFFFFU) ip_size&=~0x60;
- } else if (url[i]==':' && addr_len<8) {
- addr6[addr_len++]=(unsigned short)(value6 & 0xFFFFU);
- value6=0U;
- } else {
- ip_size&=~0x60;
- }
- }
- }
- if ((ip_size & 0x04) && addr_len==3) {
- if (exclude_ip4 == NULL) return(1);
- addr4=(addr4<<8) | (unsigned long int)(value4 & 0xFFU);
- for (i=0 ; i<num_exclude_ip4 ; i++)
- if (((exclude_ip4[i].address ^ addr4) & exclude_ip4[i].mask)==0) return(0);
- } else if ((ip_size & 0x60) && addr_len<8) {
- addr6[addr_len++]=(unsigned short)(value6 & 0xFFFFU);
- } else {
- if (exclude_name == NULL) return(1);
- ndots=0;
- for (length=0 ; (unsigned char)url[length]>' ' && url[length]!=':' && url[length]!='/' && url[length]!='?' ; length++)
- if (url[length]=='.') {
- /*
- We store the position of each dots of the URL to match it against any
- wildcard in the excluded list. The size of dotpos is big enough for the most
- ambitious URL but we have a safety mechanism that shift the positions should there be too
- many dots in the URL.
- */
- if (ndots<sizeof(dotpos)/sizeof(dotpos[0]))
- dotpos[ndots++]=length+1;
- else {
- for (j=1 ; j<ndots ; j++) dotpos[j-1]=dotpos[j];
- dotpos[ndots-1]=length+1;
- }
- }
- if (length>0) {
- for (i=0 ; i<num_exclude_name ; i++) {
- if (exclude_name[i].ndots>0) {
- const char *wurl=url;
- int len=length;
- if (exclude_name[i].ndots<=ndots) {
- wurl+=dotpos[ndots-exclude_name[i].ndots];
- len-=dotpos[ndots-exclude_name[i].ndots];
- }
- if (strncmp(exclude_name[i].url,wurl,len)==0 && exclude_name[i].url[len]=='\0') return(0);
- } else {
- if (strncmp(exclude_name[i].url,url,length)==0 && exclude_name[i].url[length]=='\0') return(0);
- }
- }
- }
- }
-
- return(1);
+ int start;
+ int i;
+ int length;
+ int ndots, firstdot;
+ struct hostnamestruct *item;
+
+ start=0;
+ ndots=-1;
+ firstdot=0;
+ length=next-url;
+ for (i=0 ; i<length ; i++)
+ if (url[i]=='*') {
+ firstdot=1;
+ } else if (url[i]=='.') {
+ if (firstdot) {
+ firstdot=0;
+ ndots=1;
+ start=i+1;
+ } else if (ndots>=0)
+ ndots++;
+ }
+ if (start>=length || firstdot) return;
+ if (start>0) {
+ url+=start;
+ length-=start;
+ }
+
+ if (num_exclude_name>=nameallocated) {
+ struct hostnamestruct *temp;
+
+ nameallocated+=5;
+ temp=realloc(exclude_name,nameallocated*sizeof(*temp));
+ if (temp==NULL) {
+ debuga(__FILE__,__LINE__,_("Not enough memory to store the excluded URLs\n"));
+ exit(EXIT_FAILURE);
+ }
+ exclude_name=temp;
+ }
+
+ item=exclude_name+num_exclude_name;
+ num_exclude_name++;
+ item->url=malloc(length+1);
+ if (!item->url) {
+ debuga(__FILE__,__LINE__,_("Not enough memory to store the excluded URLs\n"));
+ exit(EXIT_FAILURE);
+ }
+ safe_strcpy(item->url,url,length+1);
+ item->ndots=(ndots>0) ? ndots : -1;
}
+/*!
+ Read the file listing the host to exclude from the report.
-void getuexclude(const char *uexfile, int debug)
+ \param hexfile The name of the file.
+ \param debug \c True to print debug information.
+ */
+void gethexclude(const char *hexfile, int debug)
{
+ FILE *fp_ex;
+ char buf[255];
+ int type;
+ const char *name;
+ unsigned char ipv4[4];
+ unsigned short int ipv6[8];
+ int nbits;
+ const char *next;
+
+ if(access(hexfile, R_OK) != 0) {
+ debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),hexfile,strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ if(debug)
+ debuga(__FILE__,__LINE__,_("Loading exclude host file from \"%s\"\n"),hexfile);
+
+ if ((fp_ex = fopen(hexfile, "r")) == NULL) {
+ debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),hexfile,strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ while(fgets(buf,sizeof(buf),fp_ex)!=NULL){
+ if(buf[0]=='#')
+ continue;
+ fixendofline(buf);
+
+ type=extract_address_mask(buf,&name,ipv4,ipv6,&nbits,&next);
+ if (type<0) {
+ debuga(__FILE__,__LINE__,_("While reading \"%s\"\n"),hexfile);
+ exit(EXIT_FAILURE);
+ }
+
+ if (type==1) {
+ store_exclude_url(name,next);
+ } else if (type==2) {
+ store_exclude_ip4(ipv4,nbits);
+ } else if (type==3) {
+ store_exclude_ip6(ipv6,nbits);
+ }
+ }
+
+ if (fclose(fp_ex)==EOF) {
+ debuga(__FILE__,__LINE__,_("Read error in \"%s\": %s\n"),hexfile,strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ return;
+}
- FILE *fp_ex;
- char buf[255];
- long int nreg=0;
-
- if(debug)
- debuga("%s: %s",text[67],uexfile);
-
- if ((fp_ex = fopen(uexfile, "r")) == NULL) {
- fprintf(stderr, "SARG: (gethexclude) %s: %s - %s\n",text[45],uexfile,strerror(errno));
- exit(1);
- }
-
- fseek(fp_ex, 0, SEEK_END);
- nreg = ftell(fp_ex);
- if (nreg<0) {
- printf("SARG: Cannot get the size of file %s",uexfile);
- exit(1);
- }
- nreg += 11;
- fseek(fp_ex, 0, SEEK_SET);
-
- if((excludeuser=(char *) malloc(nreg))==NULL){
- fprintf(stderr, "SARG: %s (%ld):\n",text[59],nreg);
- exit(1);
- }
-
- bzero(excludeuser,nreg);
+/*!
+ Check if the URL is excluded as per the host exclusion list.
- while(fgets(buf,sizeof(buf),fp_ex)!=NULL){
- if(strchr(buf,'#') != NULL)
- continue;
- fixendofline(buf);
- strcat(excludeuser,buf);
- strcat(excludeuser," ");
- }
+ \param url The URL to check.
- strcat(excludeuser,"*END* ");
+ \retval 1 Keep the URL.
+ \retval 0 Exclude the URL.
+ */
+int vhexclude(const char *url)
+{
+ int i, j;
+ int length;
+ int type;
+ const char *name;
+ unsigned char ipv4[4];
+ unsigned short int ipv6[8];
+ unsigned long int addr4;
+ int dotpos[50];
+ int ndots;
+
+ type=extract_address_mask(url,&name,ipv4,ipv6,NULL,NULL);
+ if (type==1) {
+ if (exclude_name == NULL) return(1);
+ ndots=0;
+ for (length=0 ; (unsigned char)name[length]>' ' && name[length]!=':' && name[length]!='/' && name[length]!='?' ; length++)
+ if (name[length]=='.') {
+ /*
+ We store the position of each dots of the URL to match it against any
+ wildcard in the excluded list. The size of dotpos is big enough for the most
+ ambitious URL but we have a safety mechanism that shift the positions should there be too
+ many dots in the URL.
+ */
+ if (ndots<sizeof(dotpos)/sizeof(dotpos[0]))
+ dotpos[ndots++]=length+1;
+ else {
+ for (j=1 ; j<ndots ; j++) dotpos[j-1]=dotpos[j];
+ dotpos[ndots-1]=length+1;
+ }
+ }
+ if (length>0) {
+ for (i=0 ; i<num_exclude_name ; i++) {
+ if (exclude_name[i].ndots>0) {
+ const char *wurl=name;
+ int len=length;
+ if (exclude_name[i].ndots<=ndots) {
+ wurl+=dotpos[ndots-exclude_name[i].ndots];
+ len-=dotpos[ndots-exclude_name[i].ndots];
+ }
+ if (strncmp(exclude_name[i].url,wurl,len)==0 && exclude_name[i].url[len]=='\0') return(0);
+ } else {
+ if (strncmp(exclude_name[i].url,url,length)==0 && exclude_name[i].url[length]=='\0') return(0);
+ }
+ }
+ }
+ } else if (type==2) {
+ if (exclude_ip4 == NULL) return(1);
+ addr4=0UL;
+ for (i=0 ; i<4 ; i++) addr4=(addr4 << 8) | ipv4[i];
+ for (i=0 ; i<num_exclude_ip4 ; i++) {
+ if (((exclude_ip4[i].address ^ addr4) & exclude_ip4[i].mask)==0) return(0);
+ }
+ } else if (type==3) {
+ if (exclude_ip6 == NULL) return(1);
+ for (i=0 ; i<num_exclude_ip6 ; i++) {
+ length=exclude_ip6[i].nbits;
+ for (j=length/16-1 ; j>=0 && ipv6[j]==exclude_ip6[i].address[j] ; j--);
+ if (j>=0) return(1);
+ j=length/16;
+ if (j>=8 || length%16==0 || ((ipv6[j] ^ exclude_ip6[i].address[j]) & (0xFFFF<<(length-j*16)))==0)
+ return(0);
+ }
+ }
+ return(1);
+}
- fclose(fp_ex);
- return;
+void getuexclude(const char *uexfile, int debug)
+{
+ FILE *fp_ex;
+ char buf[255];
+ long int nreg=0;
+
+ if(debug)
+ debuga(__FILE__,__LINE__,_("Loading exclude file from \"%s\"\n"),uexfile);
+
+ if ((fp_ex = fopen(uexfile, "r")) == NULL) {
+ debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),uexfile,strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ if (fseek(fp_ex, 0, SEEK_END)==-1) {
+ debuga(__FILE__,__LINE__,_("Failed to move till the end of file \"%s\": %s\n"),uexfile,strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ nreg = ftell(fp_ex);
+ if (nreg<0) {
+ debuga(__FILE__,__LINE__,_("Cannot get the size of file \"%s\"\n"),uexfile);
+ exit(EXIT_FAILURE);
+ }
+ nreg += 11;
+ if (fseek(fp_ex, 0, SEEK_SET)==-1) {
+ debuga(__FILE__,__LINE__,_("Failed to rewind file \"%s\": %s\n"),uexfile,strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ if((excludeuser=(char *) malloc(nreg))==NULL){
+ debuga(__FILE__,__LINE__,_("malloc error (%ld bytes required)\n"),nreg);
+ exit(EXIT_FAILURE);
+ }
+
+ memset(excludeuser,0,nreg);
+
+ while(fgets(buf,sizeof(buf),fp_ex)!=NULL){
+ if(strchr(buf,'#') != NULL)
+ continue;
+ fixendofline(buf);
+ strcat(excludeuser,buf);
+ strcat(excludeuser," ");
+ }
+
+ strcat(excludeuser,"*END* ");
+
+ if (fclose(fp_ex)==EOF) {
+ debuga(__FILE__,__LINE__,_("Read error in \"%s\": %s\n"),uexfile,strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ return;
}
int vuexclude(const char *user)
{
- const char *wuser;
- int len;
-
- if (excludeuser) {
- len=strlen(user);
- wuser=excludeuser;
- while ((wuser=strstr(wuser,user))!=NULL) {
- if (wuser[len]==' ') return(0);
- wuser+=len;
- }
- }
-
- return(1);
+ const char *wuser;
+ int len;
+
+ if (excludeuser) {
+ len=strlen(user);
+ wuser=excludeuser;
+ while ((wuser=strstr(wuser,user))!=NULL) {
+ if (wuser[len]==' ') return(0);
+ wuser+=len;
+ }
+ }
+
+ return(1);
}
-int is_indexonly(void)
+bool is_indexonly(void)
{
- if (excludeuser==NULL) return(0);
- return(strstr(excludeuser,"indexonly") != NULL);
+ if (excludeuser==NULL) return(false);
+ return(strstr(excludeuser,"indexonly") != NULL);
}
void free_exclude(void)
{
- int i;
-
- if (exclude_ip4) {
- free(exclude_ip4);
- exclude_ip4=NULL;
- }
-
- if (exclude_name) {
- for (i=0 ; i<num_exclude_name ; i++)
- if (exclude_name[i].url) free(exclude_name[i].url);
- free(exclude_name);
- exclude_name=NULL;
- }
-
- if(excludeuser) {
- free(excludeuser);
- excludeuser=NULL;
- }
+ int i;
+
+ if (exclude_ip4) {
+ free(exclude_ip4);
+ exclude_ip4=NULL;
+ }
+
+ if (exclude_name) {
+ for (i=0 ; i<num_exclude_name ; i++)
+ if (exclude_name[i].url) free(exclude_name[i].url);
+ free(exclude_name);
+ exclude_name=NULL;
+ }
+
+ if(excludeuser) {
+ free(excludeuser);
+ excludeuser=NULL;
+ }
}