SET(sarg_VERSION 2)
SET(sarg_REVISION 2)
SET(sarg_BUILD "7rc1")
-SET(sarg_BUILDDATE "Jan-14-2010")
+SET(sarg_BUILDDATE "Jan-18-2010")
INCLUDE(AddFileDependencies)
INCLUDE(CheckIncludeFile)
SARG ChangeLog
-Jan-06-2010 Version 2.2.7
+Jan-18-2010 Version 2.2.7
- Extra compile and run time protection (FORTIFY_SOURCE) fixed in configure.
- Use tabulations as columns separator in intermediary files to avoid problems when a field of the log contains a space.
- Input log file type detection partly rewritten to clearly distinguish which type is processed where.
- Only copy the files when creating the directory with the images to link the reports to.
- Directories deleted without using the rm system command.
- Index created using an internal sort algorithm instead of a system call.
+ - Fixed exclude_hosts to exclude subdomains and IPv4 subnets.
Jan-06-2010 Version 2.2.6.1
- Remove unnecessary dependency on off_t.
+/*!\fn static void store_exclude_ip4(unsigned short int *addr,int mask)
+Store an IPv4 address to exclude from the report. A mask may be provided
+to exclude a subnet.
-/*! \fn int vhexclude(const char *excludefile, const char *url)
+\param addr The numbers of the IP address.
+\param mask The number of bits to mask when matching the IP address.
+*/
+
+
+
+
+/*! \fn static void store_exclude_url(char *url,int length)
+Store an URL to exclude from the report.
+
+If the URL contains a wildcard (*), only the part of the URL after
+the first dot encountered after the wildcard is stored. It serves
+to exclude a subdomain.
+
+\param url The URL to exclude.
+\param length The length of the URL in bytes.
+*/
+
+
+
+
+
+/*!\fn static void gethexclude(const char *hexfile, int debug)
+Get the list of the hosts to exclude from the report.
+
+You must call free_exclude() to free the memory allocated by this function.
+
+\param hexfile The name of the file to read. There is one host per line. Any line
+containing a # anywhere in the line is a comment.
+\param debug Set to \c true to print debug informations.
+*/
+
+
+
+
+
+/*! \fn static void getuexclude(const char *uexfile, int debug)
+Get the list of the users to exclude from the report.
+
+You must call free_exclude() to free the memory allocated by this function.
+
+\param uexfile The name of the file to read. There is one user per line. Any line
+containing a # anywhere in the line is a comment.
+\param debug Set to \c true to print debug informations.
+*/
+
+
+
+
+
+/*! \fn int vhexclude(const char *url)
Tell if the site accessed by the user is excluded from the report.
-\param excludefile The list of the excluded sites as loaded by gethexclude().
\param url The URL to check.
\retval 1 The site is not excluded.
-/*! \fn int vuexclude(const char *excludeuser, const char *user)
+/*! \fn int vuexclude(const char *user)
Tell if the user is excluded from the report.
-\param excludeuser The list of the excluded users as loaded by getuexclude().
\param user The user to check.
\retval 1 The user is not excluded.
\retval 0 The user is excluded.
*/
+
+
+
+
+
+/*! \fn int is_indexonly(void)
+Tell if the exclusion list of the users mention the word "indexonly" meaning that
+the user only want to produce the index.html.
+
+\return \c True if only the index is requested or \c false if normal processing is requested.
+*/
+
+
+
+
+
+/*! \fn void free_exclude(void)
+Free the memory allocated by gethexclude() and getuexclude().
+*/
but the names are not stored in a way that is usable by that function.
*/
-
-
-
-
-/*!\fn static void gethexclude(const char *hexfile, int debug)
-Get the list of the hosts to exclude from the report.
-
-\param hexfile The name of the file to read. There is one host per line. Any line
-containing a # anywhere in the line is a comment.
-\param debug Set to \c true to print debug informations.
-*/
-
-
-
-
-/*! \fn static void getuexclude(const char *uexfile, int debug)
-Get the list of the users to exclude from the report.
-
-\param uexfile The name of the file to read. There is one user per line. Any line
-containing a # anywhere in the line is a comment.
-\param debug Set to \c true to print debug informations.
-*/
\ No newline at end of file
#include "include/conf.h"
#include "include/defs.h"
-int vhexclude(const char *excludefile, const char *url)
+struct hostip4struct
{
+ //! The IP address.
+ unsigned long int address;
+ //! The mask to match the address of the URL.
+ unsigned long int mask;
+};
- char *whost;
- char *whost_next;
- char *whost_dom;
- char *str;
- char *wurl;
- char *wurl_dom;
- char *port;
+struct hostnamestruct
+{
+ //! The URL to match without any leading wildcard.
+ char *url;
+ //! The number of dots in the url if a wildcard is present or -1 if the address is complete (no wildcard)
+ int ndots;
+};
+
+static struct hostip4struct *exclude_ip4=NULL;
+static int num_exclude_ip4=0;
+static struct hostnamestruct *exclude_name=NULL;
+static int num_exclude_name=0;
+static int ip4allocated=0;
+static int nameallocated=0;
+
+static char *excludeuser=NULL;
+
+static void store_exclude_ip4(unsigned short int *addr,int mask)
+{
+ int i;
- if((str=strdup(excludefile))==NULL) {
- fprintf(stderr, "SARG: %s (%u):\n",text[59], (unsigned int)(strlen(excludefile)+1));
+ if (num_exclude_ip4>=ip4allocated) {
+ struct hostip4struct *temp;
+
+ ip4allocated+=5;
+ temp=realloc(exclude_ip4,ip4allocated*sizeof(*temp));
+ if (temp==NULL) {
+ fprintf(stderr,"SARG: Not enough memory to store the exlcluded IP addresses\n");
exit(1);
+ }
+ exclude_ip4=temp;
}
- if((wurl=strdup(url))==NULL) {
- fprintf(stderr, "SARG: %s (%u):\n",text[59], (unsigned int)(strlen(url)+1));
+ exclude_ip4[num_exclude_ip4].address=0UL;
+ for (i=0 ; i<4 ; i++)
+ exclude_ip4[num_exclude_ip4].address=(exclude_ip4[num_exclude_ip4].address<<8) | (unsigned char)(addr[i] & 0xFFU);
+ exclude_ip4[num_exclude_ip4].mask=(0xFFFFFFFFUL << (32-mask));
+ num_exclude_ip4++;
+}
+
+static void store_exclude_url(char *url,int length)
+{
+ int start;
+ int i;
+ int ndots, firstdot;
+ struct hostnamestruct *item;
+
+ start=0;
+ ndots=-1;
+ firstdot=0;
+ for (i=0 ; i<length ; i++)
+ if (url[i]=='*') {
+ firstdot=1;
+ } else if (url[i]=='.') {
+ if (firstdot) {
+ firstdot=0;
+ ndots=1;
+ start=i+1;
+ } else if (ndots>=0)
+ ndots++;
+ }
+ if (start>=length || firstdot) return;
+ if (start>0) {
+ url+=start;
+ length-=start;
+ }
+
+ if (num_exclude_name>=nameallocated) {
+ struct hostnamestruct *temp;
+
+ nameallocated+=5;
+ temp=realloc(exclude_name,nameallocated*sizeof(*temp));
+ if (temp==NULL) {
+ fprintf(stderr,"SARG: Not enough memory to store the exlcluded URL\n");
exit(1);
+ }
+ exclude_name=temp;
+ }
+
+ item=exclude_name+num_exclude_name;
+ num_exclude_name++;
+ item->url=malloc(length+1);
+ if (!item->url) {
+ fprintf(stderr,"SARG: Not enough memory to store the excluded URL\n");
+ exit(1);
+ }
+ strncpy(item->url,url,length);
+ item->url[length]='\0';
+ item->ndots=(ndots>0) ? ndots : -1;
+}
+
+
+void gethexclude(const char *hexfile, int debug)
+{
+
+ FILE *fp_ex;
+ char buf[255];
+ int i;
+ int ip_size;
+ unsigned int value4, value6;
+ unsigned short int addr[8];
+ int addr_len;
+ int mask, max_mask;
+
+ if(access(hexfile, R_OK) != 0) {
+ debuga("Cannot open exclude_hosts file: %s - %s",hexfile,strerror(errno));
+ exit(1);
}
- whost=str;
- whost_next=strchr(whost,' ');
- if (whost_next) *whost_next++ = '\0';
+ if(debug)
+ debuga("%s: %s",text[67],hexfile);
- port=strchr(wurl,':');
- if(port != NULL) {
- // remove the port number at the end of the site's address
- *port='\0';
+ if ((fp_ex = fopen(hexfile, "r")) == NULL) {
+ fprintf(stderr, "SARG: (gethexclude) %s: %s - %s\n",text[45],hexfile,strerror(errno));
+ exit(1);
}
- wurl_dom=strchr(wurl,'.');
- while(strcmp(whost,"*END*") != 0) {
- if(strcmp(wurl,whost) == 0) {
- free(wurl);
- free(str);
- return(0);
+ while(fgets(buf,sizeof(buf),fp_ex)!=NULL){
+ if(strchr(buf,'#') != NULL)
+ continue;
+ fixendofline(buf);
+ ip_size=0x60 | 0x04;
+ value4=0U;
+ value6=0U;
+ addr_len=0;
+ for (i=0 ; (unsigned char)buf[i]>' ' && buf[i]!='/' ; i++) {
+ if (ip_size & 0x04) {
+ if (isdigit(buf[i])) {
+ value4=value4*10+(buf[i]-'0');
+ if (value4>0xFFU) ip_size&=~0x04;
+ } else if (buf[i]=='.' && addr_len<4) {
+ addr[addr_len++]=(unsigned short)(value4 & 0xFFU);
+ value4=0U;
+ } else {
+ ip_size&=~0x04;
+ }
+ }
+ if (ip_size & 0x60) {
+ if (isdigit(buf[i])) {
+ value6=(value6<<4)+(buf[i]-'0');
+ if (value6>0xFFFFU) ip_size&=~0x60;
+ } else if (toupper(buf[i])>='A' && toupper(buf[i])<='F') {
+ value6=(value6<<4)+(toupper(buf[i])-'A'+10);
+ if (value6>0xFFFFU) ip_size&=~0x60;
+ } else if (buf[i]==':' && addr_len<8) {
+ addr[addr_len++]=(unsigned short)(value6 & 0xFFFFU);
+ value6=0U;
+ } else {
+ ip_size&=~0x60;
+ }
+ }
+ }
+ if (i==0) continue;
+ if (ip_size & 0x04) {
+ if (addr_len!=3)
+ ip_size&=~0x04;
+ else
+ addr[addr_len++]=(unsigned short)(value4 & 0xFFU);
+ }
+ if (ip_size & 0x60) {
+ if (addr_len>=8)
+ ip_size&=~0x60;
+ else
+ addr[addr_len++]=(unsigned short)(value6 & 0xFFFFU);
}
- if(wurl_dom != NULL && strchr(whost,'*') != 0) {
- whost_dom=strchr(whost,'.');
- if (whost_dom != NULL && strcmp(wurl_dom,whost_dom) == 0) {
- free(wurl);
- free(str);
- return(0);
+ if (ip_size) {
+ max_mask=(ip_size & 0x04) ? 4*8 : 8*16;
+ if (buf[i]=='/') {
+ mask=atoi(buf+i+1);
+ if (mask<0 || mask>max_mask) mask=max_mask;
+ } else
+ mask=max_mask;
+ if (ip_size & 0x04)
+ store_exclude_ip4(addr,mask);
+ else {
+ fprintf(stderr,"SARG: IPv6 addresses are not supported (found in %s)\n",hexfile);
+ exit(1);
}
+ } else {
+ store_exclude_url(buf,i);
}
- if (whost_next == NULL) break;
- whost=whost_next;
- whost_next=strchr(whost,' ');
- if (whost_next) *whost_next++ = '\0';
}
- free(wurl);
- free(str);
+
+ fclose(fp_ex);
+
+ return;
+}
+
+int vhexclude(const char *url)
+{
+ int i, j;
+ int length;
+ int ip_size;
+ unsigned int value4, value6;
+ unsigned long int addr4;
+ unsigned short int addr6[8];
+ int addr_len;
+ int dotpos[10];
+ int ndots;
+
+ ip_size=0x60 | 0x04;
+ addr4=0UL;
+ value4=0U;
+ value6=0U;
+ addr_len=0;
+ for (i=0 ; (unsigned char)url[i]>' ' && url[i]!='/' && url[i]!='?'&& ((ip_size & 0x60)!=0 || url[i]!=':') && ip_size ; i++) {
+ if (ip_size & 0x04) {
+ if (isdigit(url[i])) {
+ value4=value4*10+(url[i]-'0');
+ if (value4>0xFFU) ip_size&=~0x04;
+ } else if (url[i]=='.' && addr_len<4) {
+ addr_len++;
+ addr4=(addr4<<8) | (unsigned long int)(value4 & 0xFFU);
+ value4=0U;
+ } else {
+ ip_size&=~0x04;
+ }
+ }
+ if (ip_size & 0x60) {
+ if (isdigit(url[i])) {
+ value6=(value6<<4)+(url[i]-'0');
+ if (value6>0xFFFFU) ip_size&=~0x60;
+ } else if (toupper(url[i])>='A' && toupper(url[i])<='F') {
+ value6=(value6<<4)+(toupper(url[i])-'A'+10);
+ if (value6>0xFFFFU) ip_size&=~0x60;
+ } else if (url[i]==':' && addr_len<8) {
+ addr6[addr_len++]=(unsigned short)(value6 & 0xFFFFU);
+ value6=0U;
+ } else {
+ ip_size&=~0x60;
+ }
+ }
+ }
+ if ((ip_size & 0x04) && addr_len==3) {
+ if (exclude_ip4 == NULL) return(1);
+ addr4=(addr4<<8) | (unsigned long int)(value4 & 0xFFU);
+ for (i=0 ; i<num_exclude_ip4 ; i++)
+ if (((exclude_ip4[i].address ^ addr4) & exclude_ip4[i].mask)==0) return(0);
+ } else if ((ip_size & 0x60) && addr_len<8) {
+ addr6[addr_len++]=(unsigned short)(value6 & 0xFFFFU);
+ } else {
+ if (exclude_name == NULL) return(1);
+ ndots=0;
+ for (length=0 ; (unsigned char)url[length]>' ' && url[length]!=':' && url[length]!='/' && url[length]!='?' ; length++)
+ if (url[length]=='.') {
+ /*
+ We store the position of each dots of the URL to match it against any
+ wildcard in the excluded list. The size of dotpos is big enough for the most
+ ambitious URL but we have a safety mechanism that shift the positions should there be too
+ many dots in the URL.
+ */
+ if (ndots<sizeof(dotpos)/sizeof(dotpos[0]))
+ dotpos[ndots++]=length+1;
+ else {
+ for (j=1 ; j<ndots ; j++) dotpos[j-1]=dotpos[j];
+ dotpos[ndots-1]=length+1;
+ }
+ }
+ if (length>0) {
+ for (i=0 ; i<num_exclude_name ; i++) {
+ if (exclude_name[i].ndots>0) {
+ const char *wurl=url;
+ int len=length;
+ if (exclude_name[i].ndots<=ndots) {
+ wurl+=dotpos[ndots-exclude_name[i].ndots];
+ len-=dotpos[ndots-exclude_name[i].ndots];
+ }
+ if (strncmp(exclude_name[i].url,wurl,len)==0 && exclude_name[i].url[len]=='\0') return(0);
+ } else {
+ if (strncmp(exclude_name[i].url,url,length)==0 && exclude_name[i].url[length]=='\0') return(0);
+ }
+ }
+ }
+ }
+
return(1);
}
-int vuexclude(const char *excludeuser, const char *user)
+void getuexclude(const char *uexfile, int debug)
+{
+
+ FILE *fp_ex;
+ char buf[255];
+ long int nreg=0;
+
+ if(debug)
+ debuga("%s: %s",text[67],uexfile);
+
+ if ((fp_ex = fopen(uexfile, "r")) == NULL) {
+ fprintf(stderr, "SARG: (gethexclude) %s: %s - %s\n",text[45],uexfile,strerror(errno));
+ exit(1);
+ }
+
+ fseek(fp_ex, 0, SEEK_END);
+ nreg = ftell(fp_ex);
+ if (nreg<0) {
+ printf("SARG: Cannot get the size of file %s",uexfile);
+ exit(1);
+ }
+ nreg += 11;
+ fseek(fp_ex, 0, SEEK_SET);
+
+ if((excludeuser=(char *) malloc(nreg))==NULL){
+ fprintf(stderr, "SARG: %s (%ld):\n",text[59],nreg);
+ exit(1);
+ }
+
+ bzero(excludeuser,nreg);
+
+ while(fgets(buf,sizeof(buf),fp_ex)!=NULL){
+ if(strchr(buf,'#') != NULL)
+ continue;
+ fixendofline(buf);
+ strcat(excludeuser,buf);
+ strcat(excludeuser," ");
+ }
+
+ strcat(excludeuser,"*END* ");
+
+ fclose(fp_ex);
+
+ return;
+}
+
+int vuexclude(const char *user)
{
const char *wuser;
int len;
- len=strlen(user);
- wuser=excludeuser;
- while ((wuser=strstr(wuser,user))!=NULL) {
- if (wuser[len]==' ') return(0);
- wuser+=len;
+ if (excludeuser) {
+ len=strlen(user);
+ wuser=excludeuser;
+ while ((wuser=strstr(wuser,user))!=NULL) {
+ if (wuser[len]==' ') return(0);
+ wuser+=len;
+ }
}
return(1);
}
+
+int is_indexonly(void)
+{
+ if (excludeuser==NULL) return(0);
+ return(strstr(excludeuser,"indexonly") != NULL);
+}
+
+void free_exclude(void)
+{
+ int i;
+
+ if (exclude_ip4) {
+ free(exclude_ip4);
+ exclude_ip4=NULL;
+ }
+
+ if (exclude_name) {
+ for (i=0 ; i<num_exclude_name ; i++)
+ if (exclude_name[i].url) free(exclude_name[i].url);
+ free(exclude_name);
+ exclude_name=NULL;
+ }
+
+ if(excludeuser) {
+ free(excludeuser);
+ excludeuser=NULL;
+ }
+}
int geramail(const char *dirname, int debug, const char *outdir, int userip, const char *email, const char *TempDir);
// exclude.c
-int vhexclude(const char *excludefile, const char *url);
-int vuexclude(const char *excludeuser, const char *user);
+void gethexclude(const char *hexfile, int debug);
+void getuexclude(const char *uexfile, int debug);
+int vhexclude(const char *url);
+int vuexclude(const char *user);
+int is_indexonly(void);
+void free_exclude(void);
// getconf.c
void getconf(void);
-#define VERSION PACKAGE_VERSION" Jan-14-2010"
+#define VERSION PACKAGE_VERSION" Jan-18-2010"
#define PGM PACKAGE_NAME
#define URL "http://sarg.sourceforge.net"
date_index_to_file_index(direntp->d_name);
}
}
+ closedir(dirp);
if(strcmp(IndexTree,"date") == 0) {
make_date_index();
char *userfile;
-char *excludefile;
-char *excludeuser;
numlist weekdays = { { 0, 1, 2, 3, 4, 5, 6 }, 7 };
numlist hours = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 }, 24 };
static void getusers(const char *pwdfile, int debug);
-static void gethexclude(const char *hexfile, int debug);
-static void getuexclude(const char *uexfile, int debug);
int main(int argc,char *argv[])
{
if(hexclude[0] == '\0')
strcpy(hexclude,ExcludeHosts);
- if(strlen(hexclude) > 0) {
- if(access(hexclude, R_OK) != 0) {
- debuga("Cannot open exclude_hosts file: %s - %s",hexclude,strerror(errno));
- exit(1);
- }
+ if(hexclude[0] != '\0') {
gethexclude(hexclude,debug);
fhost++;
}
indexonly=0;
if(fuser) {
- if(strstr(excludeuser,"indexonly") != 0)
+ if(is_indexonly())
indexonly++;
}
if(strcmp(ExcludeUsers,"indexonly") == 0) indexonly++;
l=1;else l=0;
}
if(fhost) {
-// l=vhexclude(excludefile,ip);
- l=vhexclude(excludefile,url);
+// l=vhexclude(ip);
+ l=vhexclude(url);
if(!l) {
if (debugm) printf("Excluded site: %s\n",url);
totregsx++;
if(l) {
if(fuser) {
- l=vuexclude(excludeuser,user);
+ l=vuexclude(user);
if(!l) {
if (debugm) printf("Excluded user: %s\n",user);
totregsx++;
if (fp_Write_User)
fclose (fp_Write_User);
+ free_download();
+ free_excludecodes();
+ free_exclude();
+
if(debug) {
int totalcount=0;
fclose(fp_denied);
if(fp_authfail)
fclose(fp_authfail);
- free_excludecodes();
if(userfile)
free(userfile);
- if(excludefile)
- free(excludefile);
- if(excludeuser)
- free(excludeuser);
- free_download();
unlink(tmp4);
unlink(tmp6);
unlink(tmp3);
fclose(fp_denied);
if(fp_authfail)
fclose(fp_authfail);
- free_excludecodes();
if(userfile)
free(userfile);
- if(excludefile)
- free(excludefile);
- if(excludeuser)
- free(excludeuser);
- free_download();
exit(0);
}
unlinkdir(tmp,0);
}
- free_excludecodes();
if(userfile)
free(userfile);
- if(excludefile)
- free(excludefile);
- if(excludeuser)
- free(excludeuser);
- free_download();
if(debug)
debuga("%s",text[21]);
return;
}
-
-
-static void gethexclude(const char *hexfile, int debug)
-{
-
- FILE *fp_ex;
- char buf[255];
- long int nreg=0;
-
- if(debug)
- debuga("%s: %s",text[67],hexfile);
-
- if ((fp_ex = fopen(hexfile, "r")) == NULL) {
- fprintf(stderr, "SARG: (gethexclude) %s: %s - %s\n",text[45],hexfile,strerror(errno));
- exit(1);
- }
-
- fseek(fp_ex, 0, SEEK_END);
- nreg = ftell(fp_ex);
- if (nreg<0) {
- printf("SARG: Cannot get the size of file %s",hexfile);
- exit(1);
- }
- nreg += 11;
- fseek(fp_ex, 0, SEEK_SET);
-
- if((excludefile=(char *) malloc(nreg))==NULL){
- fprintf(stderr, "SARG: %s (%ld):\n",text[59],nreg);
- exit(1);
- }
-
- bzero(excludefile,nreg);
-
- while(fgets(buf,sizeof(buf),fp_ex)!=NULL){
- if(strchr(buf,'#') != NULL)
- continue;
- fixendofline(buf);
- strcat(excludefile,buf);
- strcat(excludefile," ");
- }
-
- strcat(excludefile,"*END* ");
-
- fclose(fp_ex);
-
- return;
-}
-
-
-static void getuexclude(const char *uexfile, int debug)
-{
-
- FILE *fp_ex;
- char buf[255];
- long int nreg=0;
-
- if(debug)
- debuga("%s: %s",text[67],uexfile);
-
- if ((fp_ex = fopen(uexfile, "r")) == NULL) {
- fprintf(stderr, "SARG: (gethexclude) %s: %s - %s\n",text[45],uexfile,strerror(errno));
- exit(1);
- }
-
- fseek(fp_ex, 0, SEEK_END);
- nreg = ftell(fp_ex);
- if (nreg<0) {
- printf("SARG: Cannot get the size of file %s",uexfile);
- exit(1);
- }
- nreg += 11;
- fseek(fp_ex, 0, SEEK_SET);
-
- if((excludeuser=(char *) malloc(nreg))==NULL){
- fprintf(stderr, "SARG: %s (%ld):\n",text[59],nreg);
- exit(1);
- }
-
- bzero(excludeuser,nreg);
-
- while(fgets(buf,sizeof(buf),fp_ex)!=NULL){
- if(strchr(buf,'#') != NULL)
- continue;
- fixendofline(buf);
- strcat(excludeuser,buf);
- strcat(excludeuser," ");
- }
-
- strcat(excludeuser,"*END* ");
-
- fclose(fp_ex);
-
- return;
-}
# TAG: exclude_hosts file
# Hosts, domains or subnets will be excluded from reports.
#
-# Eg.: 192.168.10.10 - exclude ip address only
-# 192.168.10.0 - exclude full C class
-# s1.acme.foo - exclude hostname only
-# acme.foo - exclude full domain name
+# Eg.: 192.168.10.10 - exclude ip address only
+# 192.168.10.0/24 - exclude full C class
+# s1.acme.foo - exclude hostname only
+# *.acme.foo - exclude full domain name
#
#exclude_hosts none