char start_hour[128];
char end_hour[128];
char *linebuf;
- char hostname[512];
- char *url;
+ const char *url;
+ char *full_url;
char *urly;
char user[MAX_USER_LEN];
char splitprefix[MAXLEN];
start_hour[0]='\0';
end_hour[0]='\0';
hm_str[0]='\0';
+ HostAliasFile[0]='\0';
denied_count=0;
download_count=0;
getuexclude(ExcludeUsers,debug);
fuser=true;
}
+ if (HostAliasFile[0] != '\0')
+ read_hostalias(HostAliasFile);
indexonly=false;
if(fuser) {
debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
exit(EXIT_FAILURE);
}
- if (getword_ptr(linebuf,&url,&gwarea,' ')<0) {
+ if (getword_ptr(linebuf,&full_url,&gwarea,' ')<0) {
debuga(_("Maybe you have a broken url in your %s file\n"),arq);
exit(EXIT_FAILURE);
}
debuga(_("Maybe you have a broken request method in your %s file\n"),arq);
exit(EXIT_FAILURE);
}
- if (getword_ptr(linebuf,&url,&gwarea,' ')<0){
+ if (getword_ptr(linebuf,&full_url,&gwarea,' ')<0){
debuga(_("Maybe you have a broken url in your %s file\n"),arq);
exit(EXIT_FAILURE);
}
debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
exit(EXIT_FAILURE);
}
- if (getword_ptr(linebuf,&url,&gwarea,'\t')<0){
+ if (getword_ptr(linebuf,&full_url,&gwarea,'\t')<0){
debuga(_("Maybe you have a broken record or garbage in your %s file\n"),arq);
exit(EXIT_FAILURE);
}
}
strcpy(tam,str);
} else if (x==isa_cols[ISACOL_Uri]) {
- url=str;
+ full_url=str;
} else if (x==isa_cols[ISACOL_Status]) {
if (strlen(str)>=sizeof(code)) {
debuga(_("Maybe you have a broken access code in your %s file\n"),arq);
}
#endif
- urly=url;
+ urly=full_url;
if(ilf!=ILF_Sarg) {
/*
The full URL is not saved in sarg log. There is no point in testing the URL to detect
a downloaded file.
*/
- download_flag=is_download_suffix(url);
+ download_flag=is_download_suffix(full_url);
if (download_flag) {
- download_url=url;
+ download_url=full_url;
download_count++;
}
} else
download_flag=false;
- // remove any protocol:// at the beginning of the URL
- if ((str = strchr(url,'/')) != NULL && str[1] == '/') {
- int i;
-
- str+=2;
- for (i=0 ; str[i] ; i++)
- url[i]=str[i];
- url[i]='\0';
- }
-
- if(!LongUrl) {
- url_hostname(url,hostname,sizeof(hostname));
- url=hostname;
- }
-
- if(url[0] == '\0') continue;
+ url=process_url(full_url);
+ if (!url || url[0] == '\0') continue;
if(addr[0] != '\0'){
if(strcmp(addr,ip)!=0) continue;
free_download();
free_excludecodes();
free_exclude();
+ free_hostalias();
if(debug) {
int totalcount=0;
--- /dev/null
+/*
+ * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
+ * 1998, 2011
+ *
+ * SARG donations:
+ * please look at http://sarg.sourceforge.net/donations.php
+ * Support:
+ * http://sourceforge.net/projects/sarg/forums/forum/363374
+ * ---------------------------------------------------------------------
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ *
+ */
+
+#include "include/conf.h"
+#include "include/defs.h"
+
+/*!
+A host name and the name to report.
+*/
+struct hostalias
+{
+ //! The next host name in the list or NULL for the last item.
+ struct hostalias *Next;
+ //! The minimum length of a candidate host name.
+ int MinLen;
+ //! The length of the constant part at the beginning of the mask.
+ int PrefixLen;
+ //! The length of the constant part at the end of the mask.
+ int SuffixLen;
+ //! The first part of the mask of the host name.
+ const char *HostName_Prefix;
+ //! The second part of the mask of the host name.
+ const char *HostName_Suffix;
+ //! The replacement name.
+ const char *Alias;
+};
+
+//! The first host name.
+struct hostalias *FirstAlias=NULL;
+
+/*!
+Read the file containing the host names to alias in the report.
+
+\param Filename The name of the file.
+*/
+void read_hostalias(const char *Filename)
+{
+ FILE *fi;
+ longline line;
+ char *buf;
+ char *str;
+ char *NameBegin;
+ char *NameEnd;
+ char *Replace;
+ struct hostalias *alias;
+ struct hostalias *prev_alias;
+ struct hostalias *new_alias;
+ int cmp;
+
+ if (debug) debuga(_("Reading host alias file \"%s\"\n"),Filename);
+ fi=fopen(Filename,"rt");
+ if (!fi) {
+ debuga(_("Cannot read host name alias file \"%s\" - %s\n"),Filename,strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ if ((line=longline_create())==NULL) {
+ debuga(_("Not enough memory to read the host name aliases\n"));
+ exit(EXIT_FAILURE);
+ }
+
+ prev_alias=NULL;
+ while ((buf=longline_read(fi,line)) != NULL) {
+ // get host name and split at the first wildcards
+ NameBegin=buf;
+ while (*NameBegin==' ' || *NameBegin=='\t') NameBegin++;
+ if ((unsigned char)*NameBegin<' ' || *NameBegin=='#' || *NameBegin==';') continue;
+ for (str=NameBegin ; *str && (unsigned char)*str>' ' && *str!='*' ; str++)
+ *str=tolower(*str);
+ if (*str=='*') {
+ *str++='\0';
+ NameEnd=str;
+ while (*str && (unsigned char)*str>' ') {
+ if (*str=='*') {
+ debuga(_("Host name alias \"%s*%s\" contains too many wildcards (*) in \"%s\""),NameBegin,NameEnd,Filename);
+ exit(EXIT_FAILURE);
+ }
+ *str=tolower(*str);
+ str++;
+ }
+ } else
+ NameEnd=NULL;
+ while (*str && (unsigned char)*str<=' ') *str++='\0';
+ if (NameEnd && NameEnd[0]=='\0') {
+ debuga(_("Host name alias \"%s\" must not end with a wildcard"),NameEnd);
+ exit(EXIT_FAILURE);
+ }
+ if (NameBegin[0]=='\0') NameBegin=NULL;
+ if (!NameBegin && !NameEnd) continue;
+
+ // get the alias
+ Replace=str;
+ while (*Replace==' ' || *Replace=='\t') Replace++;
+ if ((unsigned char)*Replace<' ') {
+ Replace=NULL;
+ } else {
+ for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
+ *str='\0';
+ }
+
+ // ignore duplicates
+ cmp=1;
+ for (alias=FirstAlias ; alias ; alias=alias->Next) {
+ if (((NameBegin && alias->HostName_Prefix && !strcmp(NameBegin,alias->HostName_Prefix)) || (!NameBegin && !alias->HostName_Prefix)) &&
+ ((NameEnd && alias->HostName_Suffix && !strcmp(NameEnd,alias->HostName_Suffix)) || (!NameEnd && !alias->HostName_Suffix))) {
+ cmp=0;
+ break;
+ }
+ }
+ if (!cmp) continue;
+
+ // insert into the list
+ new_alias=malloc(sizeof(*new_alias));
+ if (!new_alias) {
+ debuga(_("Not enough memory to store the host name aliasing directives read in \"%s\""),Filename);
+ exit(EXIT_FAILURE);
+ }
+ new_alias->MinLen=0;
+ if (NameBegin) {
+ new_alias->HostName_Prefix=strdup(NameBegin);
+ if (!new_alias->HostName_Prefix) {
+ debuga(_("Not enough memory to store the host name aliasing directives read in \"%s\""),Filename);
+ exit(EXIT_FAILURE);
+ }
+ new_alias->MinLen+=strlen(NameBegin);
+ new_alias->PrefixLen=strlen(NameBegin);
+ } else {
+ new_alias->HostName_Prefix=NULL;
+ new_alias->PrefixLen=0;
+ }
+ if (NameEnd) {
+ new_alias->HostName_Suffix=strdup(NameEnd);
+ if (!new_alias->HostName_Suffix) {
+ debuga(_("Not enough memory to store the host name aliasing directives read in \"%s\""),Filename);
+ exit(EXIT_FAILURE);
+ }
+ new_alias->MinLen+=strlen(NameEnd)+1;
+ new_alias->SuffixLen=strlen(NameEnd);
+ } else {
+ new_alias->HostName_Suffix=NULL;
+ new_alias->SuffixLen=0;
+ }
+ if (Replace) {
+ new_alias->Alias=strdup(Replace);
+ if (!new_alias->Alias) {
+ debuga(_("Not enough memory to store the host name aliasing directives read in \"%s\""),Filename);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ char *tmp;
+ tmp=malloc(new_alias->MinLen);
+ if (!tmp) {
+ debuga(_("Not enough memory to store the host name aliasing directives read in \"%s\""),Filename);
+ exit(EXIT_FAILURE);
+ }
+ if (new_alias->HostName_Prefix) strcpy(tmp,new_alias->HostName_Prefix);
+ if (new_alias->HostName_Suffix) {
+ tmp[new_alias->PrefixLen]='*';
+ strcpy(tmp+new_alias->PrefixLen+1,new_alias->HostName_Suffix);
+ }
+ new_alias->Alias=tmp;
+ }
+
+ new_alias->Next=NULL;
+ if (prev_alias)
+ prev_alias->Next=new_alias;
+ else
+ FirstAlias=new_alias;
+ prev_alias=new_alias;
+ }
+
+ longline_destroy(&line);
+ fclose(fi);
+
+ if (debug) {
+ debuga(_("List of host names to alias:\n"));
+ for (alias=FirstAlias ; alias ; alias=alias->Next) {
+ if (alias->HostName_Prefix && alias->HostName_Suffix)
+ debuga(_(" %s*%s => %s\n"),alias->HostName_Prefix,alias->HostName_Suffix,alias->Alias);
+ else if (alias->HostName_Prefix)
+ debuga(_(" %s => %s\n"),alias->HostName_Prefix,alias->Alias);
+ else
+ debuga(_(" *%s => %s\n"),alias->HostName_Suffix,alias->Alias);
+ }
+ }
+}
+
+/*!
+Free the memory allocated by read_hostalias().
+*/
+void free_hostalias(void)
+{
+ struct hostalias *alias;
+ struct hostalias *next;
+
+ for (alias=FirstAlias ; alias ; alias=next) {
+ next=alias->Next;
+ if (alias->HostName_Prefix) free((void *)alias->HostName_Prefix);
+ if (alias->HostName_Suffix) free((void *)alias->HostName_Suffix);
+ free((void *)alias->Alias);
+ free(alias);
+ }
+}
+
+/*!
+Replace the host name by its alias if it is in our list.
+
+\param host The host name.
+
+\return The pointer to the host name or its alias.
+*/
+const char *alias_url(const char *url)
+{
+ struct hostalias *alias;
+ int len;
+
+ len=strlen(url);
+ for (alias=FirstAlias ; alias ; alias=alias->Next) {
+ if (len<alias->MinLen) continue;
+ if (alias->HostName_Prefix) {
+ if (alias->HostName_Suffix) {
+ if (strncasecmp(url,alias->HostName_Prefix,alias->PrefixLen)==0 &&
+ strcasecmp(url+(len-alias->SuffixLen),alias->HostName_Suffix)==0) {
+ return(alias->Alias);
+ }
+ } else {
+ if (len==alias->PrefixLen && strcasecmp(url,alias->HostName_Prefix)==0) {
+ return(alias->Alias);
+ }
+ }
+ } else if (strcasecmp(url+(len-alias->SuffixLen),alias->HostName_Suffix)==0) {
+ return(alias->Alias);
+ }
+ }
+ return(url);
+}
+
+/*!
+Get the part of the URL necessary to generate the report.
+
+\param url The URL as extracted from the report.
+*/
+const char *process_url(char *url)
+{
+ char *str;
+ const char *start;
+
+ // remove any scheme:// at the beginning of the URL (see rfc2396 section 3.1)
+ for (str=url ; *str && (isalnum(*str) || *str=='+' || *str=='-' || *str=='.') ; str++);
+ if (*str==':' && str[1]=='/') {
+ url=str+1;
+ while (*url=='/') url++;
+ }
+
+ start=url;
+ if (!LongUrl) {
+ for (str=url ; *str && *str!='/' ; str++);
+ if (*str=='/') *str='\0';
+ if (FirstAlias)
+ start=alias_url(start);
+ }
+ return(start);
+}
+
+/*!
+Extract the host name from the URL.
+
+\param url The url whose host name must be extracted.
+\param hostname The buffer to store the host name.
+\param hostsize The size of the host name buffer.
+
+\note The function is stupid at this time. It just searches for the first slash
+in the URL and truncates the URL there. It doesn't take the protocol into account
+nor the port number nor any user or password information.
+*/
+void url_hostname(const char *url,char *hostname,int hostsize)
+{
+ int i;
+
+ hostsize--;
+ for (i=0 ; i<hostsize && url[i] && url[i]!='/' ; i++)
+ hostname[i]=url[i];
+ hostname[i]='\0';
+}
+