]>
git.ipfire.org Git - thirdparty/sarg.git/blob - url.c
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
33 #error "PCRE not compiled in"
37 A host name and the name to report.
41 //! The next host name in the list or NULL for the last item.
42 struct hostalias_name
*Next
;
43 //! The minimum length of a candidate host name.
45 //! The length of the constant part at the beginning of the mask.
47 //! The length of the constant part at the end of the mask.
49 //! The first part of the mask of the host name.
50 const char *HostName_Prefix
;
51 //! The second part of the mask of the host name.
52 const char *HostName_Suffix
;
53 //! The replacement name.
58 An IPv4 address and the name to report.
62 //! The next host name in the list or NULL for the last item.
63 struct hostalias_ipv4
*Next
;
66 //! The number of bits in the prefix.
68 //! The replacement name.
73 An IPv6 address and the name to report.
77 //! The next host name in the list or NULL for the last item.
78 struct hostalias_ipv6
*Next
;
81 //! The number of bits in the prefix.
83 //! The replacement name.
91 struct hostalias_regex
93 //! The next regular expression to test.
94 struct hostalias_regex
*Next
;
95 //! The regular expression to match against the host name.
97 //! The replacement name.
99 //! \c True if this regular expression contains at least one subpattern
104 //! The first host name.
105 static struct hostalias_name
*FirstAliasName
=NULL
;
106 //! The first IPv4 address.
107 static struct hostalias_ipv4
*FirstAliasIpv4
=NULL
;
108 //! The first IPvĀ§ address.
109 static struct hostalias_ipv6
*FirstAliasIpv6
=NULL
;
112 static struct hostalias_regex
*FirstAliasRe
=NULL
;
116 Store a name to alias.
118 \param name The name to match including the wildcard.
119 \param next A pointer to the first character after the name.
121 \retval 1 Alias added.
122 \retval 0 Ignore the line.
125 static int Alias_StoreName(const char *name
,const char *next
)
127 const char *NameBegin
;
128 const char *NameBeginE
;
130 const char *NameEndE
;
132 const char *ReplaceE
;
135 struct hostalias_name
*alias
;
136 struct hostalias_name
*new_alias
;
137 struct hostalias_name
*prev_alias
;
141 if (*name
=='#' || *name
==';') return(0);
143 // get host name and split at the wildcard
145 for (str
=NameBegin
; str
<next
&& (unsigned char)*str
>' ' && *str
!='*' ; str
++);
147 if (NameBegin
==NameBeginE
) NameBegin
=NULL
;
148 if (str
<next
&& *str
=='*') {
150 while (str
<next
&& (unsigned char)*str
>' ') {
152 debuga(_("Host name alias \"%s*%s\" contains too many wildcards (*)\n"),NameBegin
,NameEnd
);
158 if (NameEnd
==NameEndE
) {
159 debuga(_("Host name alias \"%*s\" must not end with a wildcard\n"),(int)(next
-name
),name
);
165 while (str
<next
&& (unsigned char)*str
<=' ') str
++;
166 if (!NameBegin
&& !NameEnd
) return(0);
170 if (sep
==' ' || sep
=='\t') {
172 while (*Replace
==' ' || *Replace
=='\t') Replace
++;
173 if ((unsigned char)*Replace
<' ') {
176 for (str
=Replace
; *str
&& (unsigned char)*str
>=' ' ; str
++);
183 len
=(int)(NameBeginE
-NameBegin
);
186 debuga(_("Not enough memory to store the host name aliasing directives\n"));
189 memcpy(tmp
,NameBegin
,len
);
194 len
=(int)(NameEndE
-NameEnd
);
197 if (NameBegin
) free((void*)NameBegin
);
198 debuga(_("Not enough memory to store the host name aliasing directives\n"));
201 memcpy(tmp
,NameEnd
,len
);
208 for (alias
=FirstAliasName
; alias
; alias
=alias
->Next
) {
209 if (((NameBegin
&& alias
->HostName_Prefix
&& !strcmp(NameBegin
,alias
->HostName_Prefix
)) || (!NameBegin
&& !alias
->HostName_Prefix
)) &&
210 ((NameEnd
&& alias
->HostName_Suffix
&& !strcmp(NameEnd
,alias
->HostName_Suffix
)) || (!NameEnd
&& !alias
->HostName_Suffix
))) {
211 if (NameBegin
) free((void*)NameBegin
);
217 // insert into the list
218 new_alias
=malloc(sizeof(*new_alias
));
220 if (NameBegin
) free((void*)NameBegin
);
221 if (NameEnd
) free((void*)NameEnd
);
222 debuga(_("Not enough memory to store the host name aliasing directives\n"));
227 new_alias
->HostName_Prefix
=NameBegin
;
228 new_alias
->MinLen
+=strlen(NameBegin
);
229 new_alias
->PrefixLen
=strlen(NameBegin
);
231 new_alias
->HostName_Prefix
=NULL
;
232 new_alias
->PrefixLen
=0;
235 new_alias
->HostName_Suffix
=NameEnd
;
236 new_alias
->MinLen
+=strlen(NameEnd
)+1;
237 new_alias
->SuffixLen
=strlen(NameEnd
);
239 new_alias
->HostName_Suffix
=NULL
;
240 new_alias
->SuffixLen
=0;
243 len
=(int)(ReplaceE
-Replace
);
246 debuga(_("Not enough memory to store the host name aliasing directives\n"));
250 memcpy(tmp
+1,Replace
,len
);
252 new_alias
->Alias
=tmp
;
254 tmp
=malloc(new_alias
->MinLen
+2);
256 debuga(_("Not enough memory to store the host name aliasing directives\n"));
260 if (new_alias
->HostName_Prefix
) strcpy(tmp
+1,new_alias
->HostName_Prefix
);
261 if (new_alias
->HostName_Suffix
) {
262 tmp
[new_alias
->PrefixLen
+1]='*';
263 strcpy(tmp
+new_alias
->PrefixLen
+2,new_alias
->HostName_Suffix
);
265 new_alias
->Alias
=tmp
;
268 new_alias
->Next
=NULL
;
270 prev_alias
->Next
=new_alias
;
272 FirstAliasName
=new_alias
;
277 Store a IPv4 to alias.
279 \param ipv4 The IPv4 to match.
280 \param nbits The number of bits in the prefix
281 \param next A pointer to the first character after the address.
283 \retval 1 Alias added.
284 \retval 0 Ignore the line.
287 static int Alias_StoreIpv4(unsigned char *ipv4
,int nbits
,const char *next
)
290 const char *ReplaceE
;
292 struct hostalias_ipv4
*alias
;
293 struct hostalias_ipv4
*new_alias
;
294 struct hostalias_ipv4
*prev_alias
;
301 while (*Replace
==' ' || *Replace
=='\t') Replace
++;
302 if ((unsigned char)*Replace
<' ') {
305 for (str
=Replace
; *str
&& (unsigned char)*str
>=' ' ; str
++);
309 // store more restrictive range first
311 for (alias
=FirstAliasIpv4
; alias
; alias
=alias
->Next
) {
312 i
=(nbits
<alias
->NBits
) ? nbits
: alias
->NBits
;
313 if ((i
<8 || memcmp(ipv4
,alias
->Ip
,i
/8)==0) && ((i
%8)==0 || (ipv4
[i
/8] ^ alias
->Ip
[i
/8]) & (0xFFU
<<(8-i
%8)))==0) {
319 // insert into the list
320 new_alias
=malloc(sizeof(*new_alias
));
322 debuga(_("Not enough memory to store the host name aliasing directives\n"));
325 memcpy(new_alias
->Ip
,ipv4
,4);
326 new_alias
->NBits
=nbits
;
328 len
=(int)(ReplaceE
-Replace
);
331 debuga(_("Not enough memory to store the host name aliasing directives\n"));
335 memcpy(tmp
+1,Replace
,len
);
337 new_alias
->Alias
=tmp
;
341 debuga(_("Not enough memory to store the host name aliasing directives\n"));
344 sprintf(tmp
,"%c%d.%d.%d.%d/%d",ALIAS_PREFIX
,ipv4
[0],ipv4
[1],ipv4
[2],ipv4
[3],nbits
);
345 new_alias
->Alias
=tmp
;
349 new_alias
->Next
=prev_alias
->Next
;
350 prev_alias
->Next
=new_alias
;
352 new_alias
->Next
=NULL
;
353 FirstAliasIpv4
=new_alias
;
359 Store a IPv6 to alias.
361 \param ipv6 The IPv6 to match.
362 \param nbits The number of bits in the prefix
363 \param next A pointer to the first character after the address.
365 \retval 1 Alias added.
366 \retval 0 Ignore the line.
369 static int Alias_StoreIpv6(unsigned short *ipv6
,int nbits
,const char *next
)
372 const char *ReplaceE
;
374 struct hostalias_ipv6
*alias
;
375 struct hostalias_ipv6
*new_alias
;
376 struct hostalias_ipv6
*prev_alias
;
383 while (*Replace
==' ' || *Replace
=='\t') Replace
++;
384 if ((unsigned char)*Replace
<' ') {
387 for (str
=Replace
; *str
&& (unsigned char)*str
>=' ' ; str
++);
391 // store more restrictive range first
393 for (alias
=FirstAliasIpv6
; alias
; alias
=alias
->Next
) {
394 i
=(nbits
<alias
->NBits
) ? nbits
: alias
->NBits
;
395 if ((i
<16 || memcmp(ipv6
,alias
->Ip
,i
/16*2)==0) && ((i
%16)==0 || (ipv6
[i
/16] ^ alias
->Ip
[i
/16]) & (0xFFFFU
<<(16-i
%16)))==0) {
401 // insert into the list
402 new_alias
=malloc(sizeof(*new_alias
));
404 debuga(_("Not enough memory to store the host name aliasing directives\n"));
407 memcpy(new_alias
->Ip
,ipv6
,8*sizeof(unsigned short int));
408 new_alias
->NBits
=nbits
;
410 len
=ReplaceE
-Replace
;
413 debuga(_("Not enough memory to store the host name aliasing directives\n"));
417 memcpy(tmp
+1,Replace
,len
);
419 new_alias
->Alias
=tmp
;
423 debuga(_("Not enough memory to store the host name aliasing directives\n"));
426 sprintf(tmp
,"%c%x:%x:%x:%x:%x:%x:%x:%x/%d",ALIAS_PREFIX
,ipv6
[0],ipv6
[1],ipv6
[2],ipv6
[3],ipv6
[4],ipv6
[5],ipv6
[6],ipv6
[7],nbits
);
427 new_alias
->Alias
=tmp
;
431 new_alias
->Next
=prev_alias
->Next
;
432 prev_alias
->Next
=new_alias
;
434 new_alias
->Next
=NULL
;
435 FirstAliasIpv6
=new_alias
;
442 Store a regular expression to match the alias.
444 \retval 1 Alias added.
445 \retval 0 Ignore the line.
448 static int Alias_StoreRegexp(char *buf
)
452 struct hostalias_regex
*alias
;
453 struct hostalias_regex
*new_alias
;
454 struct hostalias_regex
**prev_alias
;
455 const char *PcreError
;
464 for (End
=buf
; *End
&& *End
!=Delimiter
; End
++) {
467 debuga(_("Invalid NUL character found in regular expression\n"));
470 End
++; //ignore the escaped character
473 if (*End
!=Delimiter
) {
474 debuga(_("Unterminated regular expression\n"));
480 for (Replace
=End
; *Replace
==' ' || *Replace
=='\t' ; Replace
++);
481 for (End
=Replace
; *End
&& (unsigned char)*End
>' ' ; End
++);
485 new_alias
=malloc(sizeof(*new_alias
));
487 debuga(_("Not enough memory to store the host name aliasing directives\n"));
490 new_alias
->Next
=NULL
;
491 new_alias
->Re
=pcre_compile(buf
,0,&PcreError
,&ErrorOffset
,NULL
);
492 if (new_alias
->Re
==NULL
) {
493 debuga(_("Failed to compile the regular expression \"%s\": %s\n"),buf
,PcreError
);
500 debuga(_("Not enough memory to store the host name aliasing directives\n"));
501 pcre_free(new_alias
->Re
);
505 memcpy(tmp
+1,Replace
,len
);
507 new_alias
->Alias
=tmp
;
509 new_alias
->SubPartern
=false;
510 for (i
=1 ; tmp
[i
] ; i
++)
511 // both the sed \1 and the perl $1 replacement operators are accepted
512 if ((tmp
[i
]=='\\' || tmp
[i
]=='$') && isdigit(tmp
[i
+1])) {
513 new_alias
->SubPartern
=true;
518 prev_alias
=&FirstAliasRe
;
519 for (alias
=FirstAliasRe
; alias
; alias
=alias
->Next
)
520 prev_alias
=&alias
->Next
;
521 *prev_alias
=new_alias
;
528 Store an alias in the corresponding list.
530 \param buf The string to parse and store.
533 \retval -1 Error in file.
535 static int Alias_Store(char *buf
)
539 unsigned char ipv4
[4];
540 unsigned short int ipv6
[8];
545 if (strncasecmp(buf
,"re:",3)==0) {
547 if (Alias_StoreRegexp(buf
+3)<0)
551 debuga(_("PCRE not compiled in therefore the regular expressions are not available in the host alias file\n"));
555 type
=extract_address_mask(buf
,&name
,ipv4
,ipv6
,&nbits
,&next
);
561 Error
=Alias_StoreName(name
,next
);
562 } else if (type
==2) {
563 Error
=Alias_StoreIpv4(ipv4
,nbits
,next
);
564 } else if (type
==3) {
565 Error
=Alias_StoreIpv6(ipv6
,nbits
,next
);
567 if (Error
<0) return(-1);
572 Read the file containing the host names to alias in the report.
574 \param Filename The name of the file.
576 void read_hostalias(const char *Filename
)
582 if (debug
) debuga(_("Reading host alias file \"%s\"\n"),Filename
);
583 fi
=fopen(Filename
,"rt");
585 debuga(_("Cannot read host name alias file \"%s\" - %s\n"),Filename
,strerror(errno
));
589 if ((line
=longline_create())==NULL
) {
590 debuga(_("Not enough memory to read the host name aliases\n"));
594 while ((buf
=longline_read(fi
,line
)) != NULL
) {
595 if (Alias_Store(buf
)<0) {
596 debuga(_("While reading \"%s\"\n"),Filename
);
601 longline_destroy(&line
);
605 struct hostalias_name
*alias1
;
606 struct hostalias_ipv4
*alias4
;
607 struct hostalias_ipv6
*alias6
;
609 debuga(_("List of host names to alias:\n"));
610 for (alias1
=FirstAliasName
; alias1
; alias1
=alias1
->Next
) {
611 if (alias1
->HostName_Prefix
&& alias1
->HostName_Suffix
)
612 debuga(_(" %s*%s => %s\n"),alias1
->HostName_Prefix
,alias1
->HostName_Suffix
,alias1
->Alias
);
613 else if (alias1
->HostName_Prefix
)
614 debuga(_(" %s => %s\n"),alias1
->HostName_Prefix
,alias1
->Alias
);
616 debuga(_(" *%s => %s\n"),alias1
->HostName_Suffix
,alias1
->Alias
);
618 for (alias4
=FirstAliasIpv4
; alias4
; alias4
=alias4
->Next
) {
619 debuga(_(" %d.%d.%d.%d/%d => %s\n"),alias4
->Ip
[0],alias4
->Ip
[1],alias4
->Ip
[2],alias4
->Ip
[3],alias4
->NBits
,alias4
->Alias
);
621 for (alias6
=FirstAliasIpv6
; alias6
; alias6
=alias6
->Next
) {
622 debuga(_(" %x:%x:%x:%x:%x:%x:%x:%x/%d => %s\n"),alias6
->Ip
[0],alias6
->Ip
[1],alias6
->Ip
[2],alias6
->Ip
[3],
623 alias6
->Ip
[4],alias6
->Ip
[5],alias6
->Ip
[6],alias6
->Ip
[7],alias6
->NBits
,alias6
->Alias
);
629 Free the memory allocated by read_hostalias().
631 void free_hostalias(void)
634 struct hostalias_name
*alias1
;
635 struct hostalias_name
*next1
;
637 for (alias1
=FirstAliasName
; alias1
; alias1
=next1
) {
639 if (alias1
->HostName_Prefix
) free((void *)alias1
->HostName_Prefix
);
640 if (alias1
->HostName_Suffix
) free((void *)alias1
->HostName_Suffix
);
641 free((void *)alias1
->Alias
);
647 struct hostalias_ipv4
*alias4
;
648 struct hostalias_ipv4
*next4
;
650 for (alias4
=FirstAliasIpv4
; alias4
; alias4
=next4
) {
652 free((void *)alias4
->Alias
);
658 struct hostalias_ipv6
*alias6
;
659 struct hostalias_ipv6
*next6
;
661 for (alias6
=FirstAliasIpv6
; alias6
; alias6
=next6
) {
663 free((void *)alias6
->Alias
);
670 struct hostalias_regex
*alias
;
671 struct hostalias_regex
*next
;
673 for (alias
=FirstAliasRe
; alias
; alias
=next
) {
675 pcre_free(alias
->Re
);
676 free((void *)alias
->Alias
);
685 Replace the host name by its alias if it is in our list.
687 \param url The host name.
689 \return The pointer to the host name or its alias.
691 static const char *alias_url_name(const char *url
,const char *next
)
693 struct hostalias_name
*alias
;
697 for (alias
=FirstAliasName
; alias
; alias
=alias
->Next
) {
698 if (len
<alias
->MinLen
) continue;
699 if (alias
->HostName_Prefix
) {
700 if (alias
->HostName_Suffix
) {
701 if (strncasecmp(url
,alias
->HostName_Prefix
,alias
->PrefixLen
)==0 &&
702 strncasecmp(url
+(len
-alias
->SuffixLen
),alias
->HostName_Suffix
,len
)==0) {
703 return(alias
->Alias
);
706 if (len
==alias
->PrefixLen
&& strncasecmp(url
,alias
->HostName_Prefix
,len
)==0) {
707 return(alias
->Alias
);
710 } else if (strncasecmp(url
+(len
-alias
->SuffixLen
),alias
->HostName_Suffix
,len
)==0) {
711 return(alias
->Alias
);
718 Replace the IPv4 address by its alias if it is in our list.
720 \param url The host name.
721 \param ipv4 The address.
723 \return The pointer to the host name or its alias.
725 static const char *alias_url_ipv4(const char *url
,unsigned char *ipv4
)
727 struct hostalias_ipv4
*alias
;
730 for (alias
=FirstAliasIpv4
; alias
; alias
=alias
->Next
) {
732 if ((len
<8 || memcmp(ipv4
,alias
->Ip
,len
/8)==0) && ((len
%8)==0 || (ipv4
[len
/8] ^ alias
->Ip
[len
/8]) & (0xFFU
<<(8-len
%8)))==0) {
733 return(alias
->Alias
);
740 Replace the IPv6 address by its alias if it is in our list.
742 \param url The host name.
743 \param ipv6 The address.
745 \return The pointer to the host name or its alias.
747 static const char *alias_url_ipv6(const char *url
,unsigned short int *ipv6
)
749 struct hostalias_ipv6
*alias
;
753 for (alias
=FirstAliasIpv6
; alias
; alias
=alias
->Next
) {
755 for (i
=len
/16-1 ; i
>=0 && ipv6
[i
]==alias
->Ip
[i
] ; i
--);
758 if (i
>=8 || len
%16==0 || ((ipv6
[i
] ^ alias
->Ip
[i
]) & (0xFFFF<<(len
-i
*16)))==0) {
759 return(alias
->Alias
);
767 Replace the host name by its alias if it is in our list.
769 \param url_ptr A pointer to the host name to match. It is replaced
770 by a pointer to the alias if a match is found.
772 \return \c True if a match is found or \c false if it failed.
774 \warning The function is not thread safe as it may return a static
777 static bool alias_url_regex(const char **url_ptr
)
779 struct hostalias_regex
*alias
;
783 int ovector
[30];//size must be a multiple of 3
784 static char Replacement
[1024];
792 for (alias
=FirstAliasRe
; alias
; alias
=alias
->Next
) {
793 nmatches
=pcre_exec(alias
->Re
,NULL
,url
,url_len
,0,0,ovector
,sizeof(ovector
)/sizeof(ovector
[0]));
795 if (nmatches
==0) nmatches
=(int)(sizeof(ovector
)/sizeof(ovector
[0]))/3*2; //only 2/3 of the vector is used by pcre_exec
796 if (nmatches
==1 || !alias
->SubPartern
) { //no subpattern to replace
797 *url_ptr
=alias
->Alias
;
801 for (i
=0 ; str
[i
] ; i
++) {
802 // both the sed \1 and the perl $1 replacement operators are accepted
803 if ((str
[i
]=='\\' || str
[i
]=='$') && isdigit(str
[i
+1])) {
805 if (sub
>=1 && sub
<=nmatches
) {
807 * ovector[sub] is the start position of the match.
808 * ovector[sub+1] is the end position of the match.
811 if (repl_idx
+ovector
[sub
+1]-ovector
[sub
]>=sizeof(Replacement
)-1) break;
812 memcpy(Replacement
+repl_idx
,url
+ovector
[sub
],ovector
[sub
+1]-ovector
[sub
]);
813 repl_idx
+=ovector
[sub
+1]-ovector
[sub
];
817 if (repl_idx
>=sizeof(Replacement
)-1) break;
818 Replacement
[repl_idx
++]=str
[i
];
820 Replacement
[repl_idx
]='\0';
821 *url_ptr
=Replacement
;
831 Find the beginning of the URL beyond the scheme://
833 \param url The url possibly containing a scheme.
835 \return The beginning of the url beyond the scheme.
837 const char *skip_scheme(const char *url
)
842 Skip any scheme:// at the beginning of the URL (see rfc2396 section 3.1).
843 The underscore is not part of the standard but is found in the squid logs as cache_object://.
845 for (str
=url
; *str
&& (isalnum(*str
) || *str
=='+' || *str
=='-' || *str
=='.' || *str
=='_') ; str
++);
846 if (str
[0]==':' && str
[1]=='/' && str
[2]=='/') {
848 while (*url
=='/') url
++;
854 Get the part of the URL necessary to generate the report.
856 \param url The URL as extracted from the report.
857 \param full_url \c True to keep the whole URL. If \c false,
858 the URL is truncated to only keep the host name and port number.
860 const char *process_url(char *url
,bool full_url
)
865 unsigned char ipv4
[4];
866 unsigned short int ipv6
[8];
869 start
=skip_scheme(url
);
871 for (str
=(char *)start
; *str
&& *str
!='/' && *str
!='?' ; str
++);
875 if (alias_url_regex(&start
)) return(start
);
878 type
=extract_address_mask(start
,NULL
,ipv4
,ipv6
,NULL
,&next
);
881 start
=alias_url_name(start
,next
);
882 } else if (type
==2) {
884 start
=alias_url_ipv4(start
,ipv4
);
885 } else if (type
==3) {
887 start
=alias_url_ipv6(start
,ipv6
);
894 Extract the host name from the URL.
896 \param url The url whose host name must be extracted.
897 \param hostname The buffer to store the host name.
898 \param hostsize The size of the host name buffer.
900 \note The function is stupid at this time. It just searches for the first slash
901 in the URL and truncates the URL there. It doesn't take the protocol into account
902 nor the port number nor any user or password information.
904 void url_hostname(const char *url
,char *hostname
,int hostsize
)
909 for (i
=0 ; i
<hostsize
&& url
[i
] && url
[i
]!='/' ; i
++)