]>
git.ipfire.org Git - thirdparty/sarg.git/blob - url.c
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
6 * please look at http://sarg.sourceforge.net/donations.php
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
27 #include "include/conf.h"
28 #include "include/defs.h"
35 A host name and the name to report.
39 //! The next host name in the list or NULL for the last item.
40 struct hostalias_name
*Next
;
41 //! The minimum length of a candidate host name.
43 //! The length of the constant part at the beginning of the mask.
45 //! The length of the constant part at the end of the mask.
47 //! The first part of the mask of the host name.
48 const char *HostName_Prefix
;
49 //! The second part of the mask of the host name.
50 const char *HostName_Suffix
;
51 //! The replacement name.
56 An IPv4 address and the name to report.
60 //! The next host name in the list or NULL for the last item.
61 struct hostalias_ipv4
*Next
;
64 //! The number of bits in the prefix.
66 //! The replacement name.
71 An IPv6 address and the name to report.
75 //! The next host name in the list or NULL for the last item.
76 struct hostalias_ipv6
*Next
;
79 //! The number of bits in the prefix.
81 //! The replacement name.
89 struct hostalias_regex
91 //! The next regular expression to test.
92 struct hostalias_regex
*Next
;
93 //! The regular expression to match against the host name.
95 //! The replacement name.
97 //! \c True if this regular expression contains at least one subpattern
102 //! The first host name.
103 static struct hostalias_name
*FirstAliasName
=NULL
;
104 //! The first IPv4 address.
105 static struct hostalias_ipv4
*FirstAliasIpv4
=NULL
;
106 //! The first IPvĀ§ address.
107 static struct hostalias_ipv6
*FirstAliasIpv6
=NULL
;
110 static struct hostalias_regex
*FirstAliasRe
=NULL
;
114 Store a name to alias.
116 \param name The name to match including the wildcard.
117 \param next A pointer to the first character after the name.
119 \retval 1 Alias added.
120 \retval 0 Ignore the line.
123 static int Alias_StoreName(const char *name
,const char *next
)
125 const char *NameBegin
;
126 const char *NameBeginE
;
128 const char *NameEndE
;
130 const char *ReplaceE
;
133 struct hostalias_name
*alias
;
134 struct hostalias_name
*new_alias
;
135 struct hostalias_name
*prev_alias
;
139 if (*name
=='#' || *name
==';') return(0);
141 // get host name and split at the wildcard
143 for (str
=NameBegin
; str
<next
&& (unsigned char)*str
>' ' && *str
!='*' ; str
++);
145 if (NameBegin
==NameBeginE
) NameBegin
=NULL
;
146 if (str
<next
&& *str
=='*') {
148 while (str
<next
&& (unsigned char)*str
>' ') {
150 debuga(_("Host name alias \"%s*%s\" contains too many wildcards (*)\n"),NameBegin
,NameEnd
);
156 if (NameEnd
==NameEndE
) {
157 debuga(_("Host name alias \"%*s\" must not end with a wildcard\n"),(int)(next
-name
),name
);
163 while (str
<next
&& (unsigned char)*str
<=' ') str
++;
164 if (!NameBegin
&& !NameEnd
) return(0);
168 if (sep
==' ' || sep
=='\t') {
170 while (*Replace
==' ' || *Replace
=='\t') Replace
++;
171 if ((unsigned char)*Replace
<' ') {
174 for (str
=Replace
; *str
&& (unsigned char)*str
>=' ' ; str
++);
181 len
=(int)(NameBeginE
-NameBegin
);
184 debuga(_("Not enough memory to store the host name aliasing directives\n"));
187 memcpy(tmp
,NameBegin
,len
);
192 len
=(int)(NameEndE
-NameEnd
);
195 if (NameBegin
) free((void*)NameBegin
);
196 debuga(_("Not enough memory to store the host name aliasing directives\n"));
199 memcpy(tmp
,NameEnd
,len
);
206 for (alias
=FirstAliasName
; alias
; alias
=alias
->Next
) {
207 if (((NameBegin
&& alias
->HostName_Prefix
&& !strcmp(NameBegin
,alias
->HostName_Prefix
)) || (!NameBegin
&& !alias
->HostName_Prefix
)) &&
208 ((NameEnd
&& alias
->HostName_Suffix
&& !strcmp(NameEnd
,alias
->HostName_Suffix
)) || (!NameEnd
&& !alias
->HostName_Suffix
))) {
209 if (NameBegin
) free((void*)NameBegin
);
215 // insert into the list
216 new_alias
=malloc(sizeof(*new_alias
));
218 if (NameBegin
) free((void*)NameBegin
);
219 if (NameEnd
) free((void*)NameEnd
);
220 debuga(_("Not enough memory to store the host name aliasing directives\n"));
225 new_alias
->HostName_Prefix
=NameBegin
;
226 new_alias
->MinLen
+=strlen(NameBegin
);
227 new_alias
->PrefixLen
=strlen(NameBegin
);
229 new_alias
->HostName_Prefix
=NULL
;
230 new_alias
->PrefixLen
=0;
233 new_alias
->HostName_Suffix
=NameEnd
;
234 new_alias
->MinLen
+=strlen(NameEnd
)+1;
235 new_alias
->SuffixLen
=strlen(NameEnd
);
237 new_alias
->HostName_Suffix
=NULL
;
238 new_alias
->SuffixLen
=0;
241 len
=(int)(ReplaceE
-Replace
);
244 debuga(_("Not enough memory to store the host name aliasing directives\n"));
248 memcpy(tmp
+1,Replace
,len
);
250 new_alias
->Alias
=tmp
;
252 tmp
=malloc(new_alias
->MinLen
+2);
254 debuga(_("Not enough memory to store the host name aliasing directives\n"));
258 if (new_alias
->HostName_Prefix
) strcpy(tmp
+1,new_alias
->HostName_Prefix
);
259 if (new_alias
->HostName_Suffix
) {
260 tmp
[new_alias
->PrefixLen
+1]='*';
261 strcpy(tmp
+new_alias
->PrefixLen
+2,new_alias
->HostName_Suffix
);
263 new_alias
->Alias
=tmp
;
266 new_alias
->Next
=NULL
;
268 prev_alias
->Next
=new_alias
;
270 FirstAliasName
=new_alias
;
275 Store a IPv4 to alias.
277 \param ipv4 The IPv4 to match.
278 \param nbits The number of bits in the prefix
279 \param next A pointer to the first character after the address.
281 \retval 1 Alias added.
282 \retval 0 Ignore the line.
285 static int Alias_StoreIpv4(unsigned char *ipv4
,int nbits
,const char *next
)
288 const char *ReplaceE
;
290 struct hostalias_ipv4
*alias
;
291 struct hostalias_ipv4
*new_alias
;
292 struct hostalias_ipv4
*prev_alias
;
299 while (*Replace
==' ' || *Replace
=='\t') Replace
++;
300 if ((unsigned char)*Replace
<' ') {
303 for (str
=Replace
; *str
&& (unsigned char)*str
>=' ' ; str
++);
307 // store more restrictive range first
309 for (alias
=FirstAliasIpv4
; alias
; alias
=alias
->Next
) {
310 i
=(nbits
<alias
->NBits
) ? nbits
: alias
->NBits
;
311 if ((i
<8 || memcmp(ipv4
,alias
->Ip
,i
/8)==0) && ((i
%8)==0 || (ipv4
[i
/8] ^ alias
->Ip
[i
/8]) & (0xFFU
<<(8-i
%8)))==0) {
317 // insert into the list
318 new_alias
=malloc(sizeof(*new_alias
));
320 debuga(_("Not enough memory to store the host name aliasing directives\n"));
323 memcpy(new_alias
->Ip
,ipv4
,4);
324 new_alias
->NBits
=nbits
;
326 len
=(int)(ReplaceE
-Replace
);
329 debuga(_("Not enough memory to store the host name aliasing directives\n"));
333 memcpy(tmp
+1,Replace
,len
);
335 new_alias
->Alias
=tmp
;
339 debuga(_("Not enough memory to store the host name aliasing directives\n"));
342 sprintf(tmp
,"%c%d.%d.%d.%d/%d",ALIAS_PREFIX
,ipv4
[0],ipv4
[1],ipv4
[2],ipv4
[3],nbits
);
343 new_alias
->Alias
=tmp
;
347 new_alias
->Next
=prev_alias
->Next
;
348 prev_alias
->Next
=new_alias
;
350 new_alias
->Next
=NULL
;
351 FirstAliasIpv4
=new_alias
;
357 Store a IPv6 to alias.
359 \param ipv6 The IPv6 to match.
360 \param nbits The number of bits in the prefix
361 \param next A pointer to the first character after the address.
363 \retval 1 Alias added.
364 \retval 0 Ignore the line.
367 static int Alias_StoreIpv6(unsigned short *ipv6
,int nbits
,const char *next
)
370 const char *ReplaceE
;
372 struct hostalias_ipv6
*alias
;
373 struct hostalias_ipv6
*new_alias
;
374 struct hostalias_ipv6
*prev_alias
;
381 while (*Replace
==' ' || *Replace
=='\t') Replace
++;
382 if ((unsigned char)*Replace
<' ') {
385 for (str
=Replace
; *str
&& (unsigned char)*str
>=' ' ; str
++);
389 // store more restrictive range first
391 for (alias
=FirstAliasIpv6
; alias
; alias
=alias
->Next
) {
392 i
=(nbits
<alias
->NBits
) ? nbits
: alias
->NBits
;
393 if ((i
<16 || memcmp(ipv6
,alias
->Ip
,i
/16*2)==0) && ((i
%16)==0 || (ipv6
[i
/16] ^ alias
->Ip
[i
/16]) & (0xFFFFU
<<(16-i
%16)))==0) {
399 // insert into the list
400 new_alias
=malloc(sizeof(*new_alias
));
402 debuga(_("Not enough memory to store the host name aliasing directives\n"));
405 memcpy(new_alias
->Ip
,ipv6
,8*sizeof(unsigned short int));
406 new_alias
->NBits
=nbits
;
408 len
=ReplaceE
-Replace
;
411 debuga(_("Not enough memory to store the host name aliasing directives\n"));
415 memcpy(tmp
+1,Replace
,len
);
417 new_alias
->Alias
=tmp
;
421 debuga(_("Not enough memory to store the host name aliasing directives\n"));
424 sprintf(tmp
,"%c%x:%x:%x:%x:%x:%x:%x:%x/%d",ALIAS_PREFIX
,ipv6
[0],ipv6
[1],ipv6
[2],ipv6
[3],ipv6
[4],ipv6
[5],ipv6
[6],ipv6
[7],nbits
);
425 new_alias
->Alias
=tmp
;
429 new_alias
->Next
=prev_alias
->Next
;
430 prev_alias
->Next
=new_alias
;
432 new_alias
->Next
=NULL
;
433 FirstAliasIpv6
=new_alias
;
440 Store a regular expression to match the alias.
442 \retval 1 Alias added.
443 \retval 0 Ignore the line.
446 static int Alias_StoreRegexp(char *buf
)
450 struct hostalias_regex
*alias
;
451 struct hostalias_regex
*new_alias
;
452 struct hostalias_regex
**prev_alias
;
453 const char *PcreError
;
462 for (End
=buf
; *End
&& *End
!=Delimiter
; End
++) {
465 debuga(_("Invalid NUL character found in regular expression\n"));
468 End
++; //ignore the escaped character
471 if (*End
!=Delimiter
) {
472 debuga(_("Unterminated regular expression\n"));
478 for (Replace
=End
; *Replace
==' ' || *Replace
=='\t' ; Replace
++);
479 for (End
=Replace
; *End
&& (unsigned char)*End
>' ' ; End
++);
483 new_alias
=malloc(sizeof(*new_alias
));
485 debuga(_("Not enough memory to store the host name aliasing directives\n"));
488 new_alias
->Next
=NULL
;
489 new_alias
->Re
=pcre_compile(buf
,0,&PcreError
,&ErrorOffset
,NULL
);
490 if (new_alias
->Re
==NULL
) {
491 debuga(_("Failed to compile the regular expression \"%s\": %s\n"),buf
,PcreError
);
498 debuga(_("Not enough memory to store the host name aliasing directives\n"));
499 pcre_free(new_alias
->Re
);
503 memcpy(tmp
+1,Replace
,len
);
505 new_alias
->Alias
=tmp
;
507 new_alias
->SubPartern
=false;
508 for (i
=1 ; tmp
[i
] ; i
++)
509 // both the sed \1 and the perl $1 replacement operators are accepted
510 if ((tmp
[i
]=='\\' || tmp
[i
]=='$') && isdigit(tmp
[i
+1])) {
511 new_alias
->SubPartern
=true;
516 prev_alias
=&FirstAliasRe
;
517 for (alias
=FirstAliasRe
; alias
; alias
=alias
->Next
)
518 prev_alias
=&alias
->Next
;
519 *prev_alias
=new_alias
;
526 Store an alias in the corresponding list.
528 \param buf The string to parse and store.
531 \retval -1 Error in file.
532 \retval -2 Unknown string type to store.
534 static int Alias_Store(char *buf
)
538 unsigned char ipv4
[4];
539 unsigned short int ipv6
[8];
544 if (strncasecmp(buf
,"re:",3)==0) {
546 if (Alias_StoreRegexp(buf
+3)<0)
550 debuga(_("PCRE not compiled in therefore the regular expressions are not available in the host alias file\n"));
554 type
=extract_address_mask(buf
,&name
,ipv4
,ipv6
,&nbits
,&next
);
560 Error
=Alias_StoreName(name
,next
);
561 } else if (type
==2) {
562 Error
=Alias_StoreIpv4(ipv4
,nbits
,next
);
563 } else if (type
==3) {
564 Error
=Alias_StoreIpv6(ipv6
,nbits
,next
);
566 if (Error
<0) return(-1);
571 Read the file containing the host names to alias in the report.
573 \param Filename The name of the file.
575 void read_hostalias(const char *Filename
)
581 if (debug
) debuga(_("Reading host alias file \"%s\"\n"),Filename
);
582 fi
=fopen(Filename
,"rt");
584 debuga(_("Cannot read host name alias file \"%s\" - %s\n"),Filename
,strerror(errno
));
588 if ((line
=longline_create())==NULL
) {
589 debuga(_("Not enough memory to read the host name aliases\n"));
593 while ((buf
=longline_read(fi
,line
)) != NULL
) {
594 if (Alias_Store(buf
)<0) {
595 debuga(_("While reading \"%s\"\n"),Filename
);
600 longline_destroy(&line
);
604 struct hostalias_name
*alias1
;
605 struct hostalias_ipv4
*alias4
;
606 struct hostalias_ipv6
*alias6
;
608 debuga(_("List of host names to alias:\n"));
609 for (alias1
=FirstAliasName
; alias1
; alias1
=alias1
->Next
) {
610 if (alias1
->HostName_Prefix
&& alias1
->HostName_Suffix
)
611 debuga(_(" %s*%s => %s\n"),alias1
->HostName_Prefix
,alias1
->HostName_Suffix
,alias1
->Alias
);
612 else if (alias1
->HostName_Prefix
)
613 debuga(_(" %s => %s\n"),alias1
->HostName_Prefix
,alias1
->Alias
);
615 debuga(_(" *%s => %s\n"),alias1
->HostName_Suffix
,alias1
->Alias
);
617 for (alias4
=FirstAliasIpv4
; alias4
; alias4
=alias4
->Next
) {
618 debuga(_(" %d.%d.%d.%d/%d => %s\n"),alias4
->Ip
[0],alias4
->Ip
[1],alias4
->Ip
[2],alias4
->Ip
[3],alias4
->NBits
,alias4
->Alias
);
620 for (alias6
=FirstAliasIpv6
; alias6
; alias6
=alias6
->Next
) {
621 debuga(_(" %x:%x:%x:%x:%x:%x:%x:%x/%d => %s\n"),alias6
->Ip
[0],alias6
->Ip
[1],alias6
->Ip
[2],alias6
->Ip
[3],
622 alias6
->Ip
[4],alias6
->Ip
[5],alias6
->Ip
[6],alias6
->Ip
[7],alias6
->NBits
,alias6
->Alias
);
628 Free the memory allocated by read_hostalias().
630 void free_hostalias(void)
633 struct hostalias_name
*alias1
;
634 struct hostalias_name
*next1
;
636 for (alias1
=FirstAliasName
; alias1
; alias1
=next1
) {
638 if (alias1
->HostName_Prefix
) free((void *)alias1
->HostName_Prefix
);
639 if (alias1
->HostName_Suffix
) free((void *)alias1
->HostName_Suffix
);
640 free((void *)alias1
->Alias
);
646 struct hostalias_ipv4
*alias4
;
647 struct hostalias_ipv4
*next4
;
649 for (alias4
=FirstAliasIpv4
; alias4
; alias4
=next4
) {
651 free((void *)alias4
->Alias
);
657 struct hostalias_ipv6
*alias6
;
658 struct hostalias_ipv6
*next6
;
660 for (alias6
=FirstAliasIpv6
; alias6
; alias6
=next6
) {
662 free((void *)alias6
->Alias
);
669 struct hostalias_regex
*alias
;
670 struct hostalias_regex
*next
;
672 for (alias
=FirstAliasRe
; alias
; alias
=next
) {
674 pcre_free(alias
->Re
);
675 free((void *)alias
->Alias
);
684 Replace the host name by its alias if it is in our list.
686 \param url The host name.
688 \return The pointer to the host name or its alias.
690 static const char *alias_url_name(const char *url
,const char *next
)
692 struct hostalias_name
*alias
;
696 for (alias
=FirstAliasName
; alias
; alias
=alias
->Next
) {
697 if (len
<alias
->MinLen
) continue;
698 if (alias
->HostName_Prefix
) {
699 if (alias
->HostName_Suffix
) {
700 if (strncasecmp(url
,alias
->HostName_Prefix
,alias
->PrefixLen
)==0 &&
701 strncasecmp(url
+(len
-alias
->SuffixLen
),alias
->HostName_Suffix
,len
)==0) {
702 return(alias
->Alias
);
705 if (len
==alias
->PrefixLen
&& strncasecmp(url
,alias
->HostName_Prefix
,len
)==0) {
706 return(alias
->Alias
);
709 } else if (strncasecmp(url
+(len
-alias
->SuffixLen
),alias
->HostName_Suffix
,len
)==0) {
710 return(alias
->Alias
);
717 Replace the IPv4 address by its alias if it is in our list.
719 \param url The host name.
720 \param ipv4 The address.
722 \return The pointer to the host name or its alias.
724 static const char *alias_url_ipv4(const char *url
,unsigned char *ipv4
)
726 struct hostalias_ipv4
*alias
;
729 for (alias
=FirstAliasIpv4
; alias
; alias
=alias
->Next
) {
731 if ((len
<8 || memcmp(ipv4
,alias
->Ip
,len
/8)==0) && ((len
%8)==0 || (ipv4
[len
/8] ^ alias
->Ip
[len
/8]) & (0xFFU
<<(8-len
%8)))==0) {
732 return(alias
->Alias
);
739 Replace the IPv6 address by its alias if it is in our list.
741 \param url The host name.
742 \param ipv6 The address.
744 \return The pointer to the host name or its alias.
746 static const char *alias_url_ipv6(const char *url
,unsigned short int *ipv6
)
748 struct hostalias_ipv6
*alias
;
752 for (alias
=FirstAliasIpv6
; alias
; alias
=alias
->Next
) {
754 for (i
=len
/16-1 ; i
>=0 && ipv6
[i
]==alias
->Ip
[i
] ; i
--);
757 if (i
>=8 || len
%16==0 || ((ipv6
[i
] ^ alias
->Ip
[i
]) & (0xFFFF<<(len
-i
*16)))==0) {
758 return(alias
->Alias
);
766 Replace the host name by its alias if it is in our list.
768 \param url_ptr A pointer to the host name to match. It is replaced
769 by a pointer to the alias if a match is found.
771 \return \c True if a match is found or \c false if it failed.
773 \warning The function is not thread safe as it may return a static
776 static bool alias_url_regex(const char **url_ptr
)
778 struct hostalias_regex
*alias
;
782 int ovector
[30];//size must be a multiple of 3
783 static char Replacement
[1024];
791 for (alias
=FirstAliasRe
; alias
; alias
=alias
->Next
) {
792 nmatches
=pcre_exec(alias
->Re
,NULL
,url
,url_len
,0,0,ovector
,sizeof(ovector
)/sizeof(ovector
[0]));
794 if (nmatches
==0) nmatches
=(int)(sizeof(ovector
)/sizeof(ovector
[0]))/3*2; //only 2/3 of the vector is used by pcre_exec
795 if (nmatches
==1 || !alias
->SubPartern
) { //no subpattern to replace
796 *url_ptr
=alias
->Alias
;
800 for (i
=0 ; str
[i
] ; i
++) {
801 // both the sed \1 and the perl $1 replacement operators are accepted
802 if ((str
[i
]=='\\' || str
[i
]=='$') && isdigit(str
[i
+1])) {
804 if (sub
>=1 && sub
<=nmatches
) {
806 * ovector[sub] is the start position of the match.
807 * ovector[sub+1] is the end position of the match.
810 if (repl_idx
+ovector
[sub
+1]-ovector
[sub
]>=sizeof(Replacement
)-1) break;
811 memcpy(Replacement
+repl_idx
,url
+ovector
[sub
],ovector
[sub
+1]-ovector
[sub
]);
812 repl_idx
+=ovector
[sub
+1]-ovector
[sub
];
816 if (repl_idx
>=sizeof(Replacement
)-1) break;
817 Replacement
[repl_idx
++]=str
[i
];
819 Replacement
[repl_idx
]='\0';
820 *url_ptr
=Replacement
;
830 Find the beginning of the URL beyond the scheme://
832 \param url The url possibly containing a scheme.
834 \return The beginning of the url beyond the scheme.
836 const char *skip_scheme(const char *url
)
841 Skip any scheme:// at the beginning of the URL (see rfc2396 section 3.1).
842 The underscore is not part of the standard but is found in the squid logs as cache_object://.
844 for (str
=url
; *str
&& (isalnum(*str
) || *str
=='+' || *str
=='-' || *str
=='.' || *str
=='_') ; str
++);
845 if (str
[0]==':' && str
[1]=='/' && str
[2]=='/') {
847 while (*url
=='/') url
++;
853 Get the part of the URL necessary to generate the report.
855 \param url The URL as extracted from the report.
856 \param full_url \c True to keep the whole URL. If \c false,
857 the URL is truncated to only keep the host name and port number.
859 const char *process_url(const char *url
,bool full_url
)
861 static char short_url
[1024];
865 unsigned char ipv4
[4];
866 unsigned short int ipv6
[8];
869 start
=skip_scheme(url
);
871 for (i
=0 ; i
<sizeof(short_url
)-1 && start
[i
] && start
[i
]!='/' && start
[i
]!='?' ; i
++)
872 short_url
[i
]=start
[i
];
877 if (alias_url_regex(&start
)) return(start
);
880 type
=extract_address_mask(start
,NULL
,ipv4
,ipv6
,NULL
,&next
);
883 start
=alias_url_name(start
,next
);
884 } else if (type
==2) {
886 start
=alias_url_ipv4(start
,ipv4
);
887 } else if (type
==3) {
889 start
=alias_url_ipv6(start
,ipv6
);
896 Extract the host name from the URL.
898 \param url The url whose host name must be extracted.
899 \param hostname The buffer to store the host name.
900 \param hostsize The size of the host name buffer.
902 \note The function is stupid at this time. It just searches for the first slash
903 in the URL and truncates the URL there. It doesn't take the protocol into account
904 nor the port number nor any user or password information.
906 void url_hostname(const char *url
,char *hostname
,int hostsize
)
911 for (i
=0 ; i
<hostsize
&& url
[i
] && url
[i
]!='/' ; i
++)