]> git.ipfire.org Git - thirdparty/sarg.git/blob - url.c
Add support for regular expressions in aliasing the hosts
[thirdparty/sarg.git] / url.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2012
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #ifdef HAVE_PCRE_H
30 #include <pcre.h>
31 #define USE_PCRE
32 #else
33 #error "PCRE not compiled in"
34 #endif
35
36 /*!
37 A host name and the name to report.
38 */
39 struct hostalias_name
40 {
41 //! The next host name in the list or NULL for the last item.
42 struct hostalias_name *Next;
43 //! The minimum length of a candidate host name.
44 int MinLen;
45 //! The length of the constant part at the beginning of the mask.
46 int PrefixLen;
47 //! The length of the constant part at the end of the mask.
48 int SuffixLen;
49 //! The first part of the mask of the host name.
50 const char *HostName_Prefix;
51 //! The second part of the mask of the host name.
52 const char *HostName_Suffix;
53 //! The replacement name.
54 const char *Alias;
55 };
56
57 /*!
58 An IPv4 address and the name to report.
59 */
60 struct hostalias_ipv4
61 {
62 //! The next host name in the list or NULL for the last item.
63 struct hostalias_ipv4 *Next;
64 //! The IP address.
65 unsigned char Ip[4];
66 //! The number of bits in the prefix.
67 int NBits;
68 //! The replacement name.
69 const char *Alias;
70 };
71
72 /*!
73 An IPv6 address and the name to report.
74 */
75 struct hostalias_ipv6
76 {
77 //! The next host name in the list or NULL for the last item.
78 struct hostalias_ipv6 *Next;
79 //! The IP address.
80 unsigned short Ip[8];
81 //! The number of bits in the prefix.
82 int NBits;
83 //! The replacement name.
84 const char *Alias;
85 };
86
87 #ifdef USE_PCRE
88 /*!
89 A regular expression.
90 */
91 struct hostalias_regex
92 {
93 //! The next regular expression to test.
94 struct hostalias_regex *Next;
95 //! The regular expression to match against the host name.
96 pcre *Re;
97 //! The replacement name.
98 const char *Alias;
99 };
100 #endif
101
102 //! The first host name.
103 static struct hostalias_name *FirstAliasName=NULL;
104 //! The first IPv4 address.
105 static struct hostalias_ipv4 *FirstAliasIpv4=NULL;
106 //! The first IPvĀ§ address.
107 static struct hostalias_ipv6 *FirstAliasIpv6=NULL;
108
109 #ifdef USE_PCRE
110 static struct hostalias_regex *FirstAliasRe=NULL;
111 #endif
112
113 /*!
114 Store a name to alias.
115
116 \param name The name to match including the wildcard.
117 \param next A pointer to the first character after the name.
118
119 \retval 1 Alias added.
120 \retval 0 Ignore the line.
121 \retval -1 Error.
122 */
123 static int Alias_StoreName(const char *name,const char *next)
124 {
125 const char *NameBegin;
126 const char *NameBeginE;
127 const char *NameEnd;
128 const char *NameEndE;
129 const char *Replace;
130 const char *ReplaceE;
131 const char *str;
132 char sep;
133 struct hostalias_name *alias;
134 struct hostalias_name *new_alias;
135 struct hostalias_name *prev_alias;
136 char *tmp;
137 int len;
138
139 if (*name=='#' || *name==';') return(0);
140
141 // get host name and split at the wildcard
142 NameBegin=name;
143 for (str=NameBegin ; str<next && (unsigned char)*str>' ' && *str!='*' ; str++);
144 NameBeginE=str;
145 if (NameBegin==NameBeginE) NameBegin=NULL;
146 if (str<next && *str=='*') {
147 NameEnd=++str;
148 while (str<next && (unsigned char)*str>' ') {
149 if (*str=='*') {
150 debuga(_("Host name alias \"%s*%s\" contains too many wildcards (*)\n"),NameBegin,NameEnd);
151 return(-1);
152 }
153 str++;
154 }
155 NameEndE=str;
156 if (NameEnd==NameEndE) {
157 debuga(_("Host name alias \"%*s\" must not end with a wildcard\n"),(int)(next-name),name);
158 return(-1);
159 }
160 } else {
161 NameEnd=NULL;
162 }
163 while (str<next && (unsigned char)*str<=' ') str++;
164 if (!NameBegin && !NameEnd) return(0);
165
166 // get the alias
167 sep=*next;
168 if (sep==' ' || sep=='\t') {
169 Replace=next;
170 while (*Replace==' ' || *Replace=='\t') Replace++;
171 if ((unsigned char)*Replace<' ') {
172 Replace=NULL;
173 } else {
174 for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
175 ReplaceE=str;
176 }
177 } else
178 Replace=NULL;
179
180 if (NameBegin) {
181 len=(int)(NameBeginE-NameBegin);
182 tmp=malloc(len+1);
183 if (!tmp) {
184 debuga(_("Not enough memory to store the host name aliasing directives\n"));
185 return(-1);
186 }
187 memcpy(tmp,NameBegin,len);
188 tmp[len]='\0';
189 NameBegin=tmp;
190 }
191 if (NameEnd) {
192 len=(int)(NameEndE-NameEnd);
193 tmp=malloc(len+1);
194 if (!tmp) {
195 if (NameBegin) free((void*)NameBegin);
196 debuga(_("Not enough memory to store the host name aliasing directives\n"));
197 return(-1);
198 }
199 memcpy(tmp,NameEnd,len);
200 tmp[len]='\0';
201 NameEnd=tmp;
202 }
203
204 // ignore duplicates
205 prev_alias=NULL;
206 for (alias=FirstAliasName ; alias ; alias=alias->Next) {
207 if (((NameBegin && alias->HostName_Prefix && !strcmp(NameBegin,alias->HostName_Prefix)) || (!NameBegin && !alias->HostName_Prefix)) &&
208 ((NameEnd && alias->HostName_Suffix && !strcmp(NameEnd,alias->HostName_Suffix)) || (!NameEnd && !alias->HostName_Suffix))) {
209 if (NameBegin) free((void*)NameBegin);
210 return(0);
211 }
212 prev_alias=alias;
213 }
214
215 // insert into the list
216 new_alias=malloc(sizeof(*new_alias));
217 if (!new_alias) {
218 if (NameBegin) free((void*)NameBegin);
219 if (NameEnd) free((void*)NameEnd);
220 debuga(_("Not enough memory to store the host name aliasing directives\n"));
221 return(-1);
222 }
223 new_alias->MinLen=0;
224 if (NameBegin) {
225 new_alias->HostName_Prefix=NameBegin;
226 new_alias->MinLen+=strlen(NameBegin);
227 new_alias->PrefixLen=strlen(NameBegin);
228 } else {
229 new_alias->HostName_Prefix=NULL;
230 new_alias->PrefixLen=0;
231 }
232 if (NameEnd) {
233 new_alias->HostName_Suffix=NameEnd;
234 new_alias->MinLen+=strlen(NameEnd)+1;
235 new_alias->SuffixLen=strlen(NameEnd);
236 } else {
237 new_alias->HostName_Suffix=NULL;
238 new_alias->SuffixLen=0;
239 }
240 if (Replace) {
241 len=(int)(ReplaceE-Replace);
242 tmp=malloc(len+2);
243 if (!tmp) {
244 debuga(_("Not enough memory to store the host name aliasing directives\n"));
245 return(-1);
246 }
247 tmp[0]=ALIAS_PREFIX;
248 memcpy(tmp+1,Replace,len);
249 tmp[len+1]='\0';
250 new_alias->Alias=tmp;
251 } else {
252 tmp=malloc(new_alias->MinLen+2);
253 if (!tmp) {
254 debuga(_("Not enough memory to store the host name aliasing directives\n"));
255 return(-1);
256 }
257 tmp[0]=ALIAS_PREFIX;
258 if (new_alias->HostName_Prefix) strcpy(tmp+1,new_alias->HostName_Prefix);
259 if (new_alias->HostName_Suffix) {
260 tmp[new_alias->PrefixLen+1]='*';
261 strcpy(tmp+new_alias->PrefixLen+2,new_alias->HostName_Suffix);
262 }
263 new_alias->Alias=tmp;
264 }
265
266 new_alias->Next=NULL;
267 if (prev_alias)
268 prev_alias->Next=new_alias;
269 else
270 FirstAliasName=new_alias;
271 return(1);
272 }
273
274 /*!
275 Store a IPv4 to alias.
276
277 \param ipv4 The IPv4 to match.
278 \param nbits The number of bits in the prefix
279 \param next A pointer to the first character after the address.
280
281 \retval 1 Alias added.
282 \retval 0 Ignore the line.
283 \retval -1 Error.
284 */
285 static int Alias_StoreIpv4(unsigned char *ipv4,int nbits,const char *next)
286 {
287 const char *Replace;
288 const char *ReplaceE;
289 const char *str;
290 struct hostalias_ipv4 *alias;
291 struct hostalias_ipv4 *new_alias;
292 struct hostalias_ipv4 *prev_alias;
293 int i;
294 char *tmp;
295 int len;
296
297 // get the alias
298 Replace=next;
299 while (*Replace==' ' || *Replace=='\t') Replace++;
300 if ((unsigned char)*Replace<' ') {
301 Replace=NULL;
302 } else {
303 for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
304 ReplaceE=str;
305 }
306
307 // store more restrictive range first
308 prev_alias=NULL;
309 for (alias=FirstAliasIpv4 ; alias ; alias=alias->Next) {
310 i=(nbits<alias->NBits) ? nbits : alias->NBits;
311 if ((i<8 || memcmp(ipv4,alias->Ip,i/8)==0) && ((i%8)==0 || (ipv4[i/8] ^ alias->Ip[i/8]) & (0xFFU<<(8-i%8)))==0) {
312 break;
313 }
314 prev_alias=alias;
315 }
316
317 // insert into the list
318 new_alias=malloc(sizeof(*new_alias));
319 if (!new_alias) {
320 debuga(_("Not enough memory to store the host name aliasing directives\n"));
321 return(-1);
322 }
323 memcpy(new_alias->Ip,ipv4,4);
324 new_alias->NBits=nbits;
325 if (Replace) {
326 len=(int)(ReplaceE-Replace);
327 tmp=malloc(len+2);
328 if (!tmp) {
329 debuga(_("Not enough memory to store the host name aliasing directives\n"));
330 return(-1);
331 }
332 tmp[0]=ALIAS_PREFIX;
333 memcpy(tmp+1,Replace,len);
334 tmp[len+1]='\0';
335 new_alias->Alias=tmp;
336 } else {
337 tmp=malloc(5*4+1);
338 if (!tmp) {
339 debuga(_("Not enough memory to store the host name aliasing directives\n"));
340 return(-1);
341 }
342 sprintf(tmp,"%c%d.%d.%d.%d/%d",ALIAS_PREFIX,ipv4[0],ipv4[1],ipv4[2],ipv4[3],nbits);
343 new_alias->Alias=tmp;
344 }
345
346 if (prev_alias) {
347 new_alias->Next=prev_alias->Next;
348 prev_alias->Next=new_alias;
349 } else {
350 new_alias->Next=NULL;
351 FirstAliasIpv4=new_alias;
352 }
353 return(1);
354 }
355
356 /*!
357 Store a IPv6 to alias.
358
359 \param ipv6 The IPv6 to match.
360 \param nbits The number of bits in the prefix
361 \param next A pointer to the first character after the address.
362
363 \retval 1 Alias added.
364 \retval 0 Ignore the line.
365 \retval -1 Error.
366 */
367 static int Alias_StoreIpv6(unsigned short *ipv6,int nbits,const char *next)
368 {
369 const char *Replace;
370 const char *ReplaceE;
371 const char *str;
372 struct hostalias_ipv6 *alias;
373 struct hostalias_ipv6 *new_alias;
374 struct hostalias_ipv6 *prev_alias;
375 int i;
376 char *tmp;
377 int len;
378
379 // get the alias
380 Replace=next;
381 while (*Replace==' ' || *Replace=='\t') Replace++;
382 if ((unsigned char)*Replace<' ') {
383 Replace=NULL;
384 } else {
385 for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
386 ReplaceE=str;
387 }
388
389 // store more restrictive range first
390 prev_alias=NULL;
391 for (alias=FirstAliasIpv6 ; alias ; alias=alias->Next) {
392 i=(nbits<alias->NBits) ? nbits : alias->NBits;
393 if ((i<16 || memcmp(ipv6,alias->Ip,i/16*2)==0) && ((i%16)==0 || (ipv6[i/16] ^ alias->Ip[i/16]) & (0xFFFFU<<(16-i%16)))==0) {
394 break;
395 }
396 prev_alias=alias;
397 }
398
399 // insert into the list
400 new_alias=malloc(sizeof(*new_alias));
401 if (!new_alias) {
402 debuga(_("Not enough memory to store the host name aliasing directives\n"));
403 return(-1);
404 }
405 memcpy(new_alias->Ip,ipv6,8*sizeof(unsigned short int));
406 new_alias->NBits=nbits;
407 if (Replace) {
408 len=ReplaceE-Replace;
409 tmp=malloc(len+2);
410 if (!tmp) {
411 debuga(_("Not enough memory to store the host name aliasing directives\n"));
412 return(-1);
413 }
414 tmp[0]=ALIAS_PREFIX;
415 memcpy(tmp+1,Replace,len);
416 tmp[len+1]='\0';
417 new_alias->Alias=tmp;
418 } else {
419 tmp=malloc(5*8+5);
420 if (!tmp) {
421 debuga(_("Not enough memory to store the host name aliasing directives\n"));
422 return(-1);
423 }
424 sprintf(tmp,"%c%x:%x:%x:%x:%x:%x:%x:%x/%d",ALIAS_PREFIX,ipv6[0],ipv6[1],ipv6[2],ipv6[3],ipv6[4],ipv6[5],ipv6[6],ipv6[7],nbits);
425 new_alias->Alias=tmp;
426 }
427
428 if (prev_alias) {
429 new_alias->Next=prev_alias->Next;
430 prev_alias->Next=new_alias;
431 } else {
432 new_alias->Next=NULL;
433 FirstAliasIpv6=new_alias;
434 }
435 return(1);
436 }
437
438 #ifdef USE_PCRE
439 /*!
440 Store a regular expression to match the alias.
441
442 \retval 1 Alias added.
443 \retval 0 Ignore the line.
444 \retval -1 Error.
445 */
446 static int Alias_StoreRegexp(char *buf)
447 {
448 char Delimiter;
449 char *End;
450 struct hostalias_regex *alias;
451 struct hostalias_regex *new_alias;
452 struct hostalias_regex **prev_alias;
453 const char *PcreError;
454 int ErrorOffset;
455 char *Replace;
456 int len;
457 char *tmp;
458
459 // find the pattern
460 Delimiter=*buf++;
461 for (End=buf ; *End && *End!=Delimiter ; End++) {
462 if (*End=='\\') {
463 if (End[1]=='\0') {
464 debuga(_("Invalid NUL character found in regular expression\n"));
465 return(-1);
466 }
467 End++; //ignore the escaped character
468 }
469 }
470 if (*End!=Delimiter) {
471 debuga(_("Unterminated regular expression\n"));
472 return(-1);
473 }
474 *End++='\0';
475
476 // find the alias
477 for (Replace=End ; *Replace==' ' || *Replace=='\t' ; Replace++);
478 for (End=Replace ; *End && (unsigned char)*End>' ' ; End++);
479 *End='\0';
480
481 // store it
482 new_alias=malloc(sizeof(*new_alias));
483 if (!new_alias) {
484 debuga(_("Not enough memory to store the host name aliasing directives\n"));
485 return(-1);
486 }
487 new_alias->Next=NULL;
488 new_alias->Re=pcre_compile(buf,0,&PcreError,&ErrorOffset,NULL);
489 if (new_alias->Re==NULL) {
490 debuga(_("Failed to compile the regular expression: %s\n"),PcreError);
491 free(new_alias);
492 return(-1);
493 }
494 len=strlen(Replace);
495 tmp=malloc(len+2);
496 if (!tmp) {
497 debuga(_("Not enough memory to store the host name aliasing directives\n"));
498 pcre_free(new_alias->Re);
499 return(-1);
500 }
501 tmp[0]=ALIAS_PREFIX;
502 memcpy(tmp+1,Replace,len);
503 tmp[len+1]='\0';
504 new_alias->Alias=tmp;
505
506 // chain it
507 prev_alias=&FirstAliasRe;
508 for (alias=FirstAliasRe ; alias ; alias=alias->Next)
509 prev_alias=&alias->Next;
510 *prev_alias=new_alias;
511
512 return(1);
513 }
514 #endif
515
516 /*!
517 Store an alias in the corresponding list.
518
519 \param buf The string to parse and store.
520
521 \retval 0 No error.
522 \retval -1 Error in file.
523 */
524 static int Alias_Store(char *buf)
525 {
526 int type;
527 const char *name;
528 unsigned char ipv4[4];
529 unsigned short int ipv6[8];
530 int nbits;
531 const char *next;
532 int Error;
533
534 #ifdef USE_PCRE
535 if (strncasecmp(buf,"re:",3)==0) {
536 if (Alias_StoreRegexp(buf+3)<0)
537 return(-1);
538 return(0);
539 }
540 #endif
541 type=extract_address_mask(buf,&name,ipv4,ipv6,&nbits,&next);
542 if (type<0) {
543 return(-1);
544 }
545
546 if (type==1) {
547 Error=Alias_StoreName(name,next);
548 } else if (type==2) {
549 Error=Alias_StoreIpv4(ipv4,nbits,next);
550 } else if (type==3) {
551 Error=Alias_StoreIpv6(ipv6,nbits,next);
552 }
553 if (Error<0) return(-1);
554 return(0);
555 }
556
557 /*!
558 Read the file containing the host names to alias in the report.
559
560 \param Filename The name of the file.
561 */
562 void read_hostalias(const char *Filename)
563 {
564 FILE *fi;
565 longline line;
566 char *buf;
567
568 if (debug) debuga(_("Reading host alias file \"%s\"\n"),Filename);
569 fi=fopen(Filename,"rt");
570 if (!fi) {
571 debuga(_("Cannot read host name alias file \"%s\" - %s\n"),Filename,strerror(errno));
572 exit(EXIT_FAILURE);
573 }
574
575 if ((line=longline_create())==NULL) {
576 debuga(_("Not enough memory to read the host name aliases\n"));
577 exit(EXIT_FAILURE);
578 }
579
580 while ((buf=longline_read(fi,line)) != NULL) {
581 if (Alias_Store(buf)<0) {
582 debuga(_("While reading \"%s\"\n"),Filename);
583 exit(EXIT_FAILURE);
584 }
585 }
586
587 longline_destroy(&line);
588 fclose(fi);
589
590 if (debug) {
591 struct hostalias_name *alias1;
592 struct hostalias_ipv4 *alias4;
593 struct hostalias_ipv6 *alias6;
594
595 debuga(_("List of host names to alias:\n"));
596 for (alias1=FirstAliasName ; alias1 ; alias1=alias1->Next) {
597 if (alias1->HostName_Prefix && alias1->HostName_Suffix)
598 debuga(_(" %s*%s => %s\n"),alias1->HostName_Prefix,alias1->HostName_Suffix,alias1->Alias);
599 else if (alias1->HostName_Prefix)
600 debuga(_(" %s => %s\n"),alias1->HostName_Prefix,alias1->Alias);
601 else
602 debuga(_(" *%s => %s\n"),alias1->HostName_Suffix,alias1->Alias);
603 }
604 for (alias4=FirstAliasIpv4 ; alias4 ; alias4=alias4->Next) {
605 debuga(_(" %d.%d.%d.%d/%d => %s\n"),alias4->Ip[0],alias4->Ip[1],alias4->Ip[2],alias4->Ip[3],alias4->NBits,alias4->Alias);
606 }
607 for (alias6=FirstAliasIpv6 ; alias6 ; alias6=alias6->Next) {
608 debuga(_(" %x:%x:%x:%x:%x:%x:%x:%x/%d => %s\n"),alias6->Ip[0],alias6->Ip[1],alias6->Ip[2],alias6->Ip[3],
609 alias6->Ip[4],alias6->Ip[5],alias6->Ip[6],alias6->Ip[7],alias6->NBits,alias6->Alias);
610 }
611 }
612 }
613
614 /*!
615 Free the memory allocated by read_hostalias().
616 */
617 void free_hostalias(void)
618 {
619 {
620 struct hostalias_name *alias1;
621 struct hostalias_name *next1;
622
623 for (alias1=FirstAliasName ; alias1 ; alias1=next1) {
624 next1=alias1->Next;
625 if (alias1->HostName_Prefix) free((void *)alias1->HostName_Prefix);
626 if (alias1->HostName_Suffix) free((void *)alias1->HostName_Suffix);
627 free((void *)alias1->Alias);
628 free(alias1);
629 }
630 FirstAliasName=NULL;
631 }
632 {
633 struct hostalias_ipv4 *alias4;
634 struct hostalias_ipv4 *next4;
635
636 for (alias4=FirstAliasIpv4 ; alias4 ; alias4=next4) {
637 next4=alias4->Next;
638 free((void *)alias4->Alias);
639 free(alias4);
640 }
641 FirstAliasIpv4=NULL;
642 }
643 {
644 struct hostalias_ipv6 *alias6;
645 struct hostalias_ipv6 *next6;
646
647 for (alias6=FirstAliasIpv6 ; alias6 ; alias6=next6) {
648 next6=alias6->Next;
649 free((void *)alias6->Alias);
650 free(alias6);
651 }
652 FirstAliasIpv6=NULL;
653 }
654 #ifdef USE_PCRE
655 {
656 struct hostalias_regex *alias;
657 struct hostalias_regex *next;
658
659 for (alias=FirstAliasRe ; alias ; alias=next) {
660 next=alias->Next;
661 pcre_free(alias->Re);
662 free((void *)alias->Alias);
663 free(alias);
664 }
665 FirstAliasRe=NULL;
666 }
667 #endif
668 }
669
670 /*!
671 Replace the host name by its alias if it is in our list.
672
673 \param url The host name.
674
675 \return The pointer to the host name or its alias.
676 */
677 static const char *alias_url_name(const char *url,const char *next)
678 {
679 struct hostalias_name *alias;
680 int len;
681
682 len=(int)(next-url);
683 for (alias=FirstAliasName ; alias ; alias=alias->Next) {
684 if (len<alias->MinLen) continue;
685 if (alias->HostName_Prefix) {
686 if (alias->HostName_Suffix) {
687 if (strncasecmp(url,alias->HostName_Prefix,alias->PrefixLen)==0 &&
688 strncasecmp(url+(len-alias->SuffixLen),alias->HostName_Suffix,len)==0) {
689 return(alias->Alias);
690 }
691 } else {
692 if (len==alias->PrefixLen && strncasecmp(url,alias->HostName_Prefix,len)==0) {
693 return(alias->Alias);
694 }
695 }
696 } else if (strncasecmp(url+(len-alias->SuffixLen),alias->HostName_Suffix,len)==0) {
697 return(alias->Alias);
698 }
699 }
700 return(url);
701 }
702
703 /*!
704 Replace the IPv4 address by its alias if it is in our list.
705
706 \param url The host name.
707 \param ipv4 The address.
708
709 \return The pointer to the host name or its alias.
710 */
711 static const char *alias_url_ipv4(const char *url,unsigned char *ipv4)
712 {
713 struct hostalias_ipv4 *alias;
714 int len;
715
716 for (alias=FirstAliasIpv4 ; alias ; alias=alias->Next) {
717 len=alias->NBits;
718 if ((len<8 || memcmp(ipv4,alias->Ip,len/8)==0) && ((len%8)==0 || (ipv4[len/8] ^ alias->Ip[len/8]) & (0xFFU<<(8-len%8)))==0) {
719 return(alias->Alias);
720 }
721 }
722 return(url);
723 }
724
725 /*!
726 Replace the IPv6 address by its alias if it is in our list.
727
728 \param url The host name.
729 \param ipv6 The address.
730
731 \return The pointer to the host name or its alias.
732 */
733 static const char *alias_url_ipv6(const char *url,unsigned short int *ipv6)
734 {
735 struct hostalias_ipv6 *alias;
736 int len;
737 int i;
738
739 for (alias=FirstAliasIpv6 ; alias ; alias=alias->Next) {
740 len=alias->NBits;
741 for (i=len/16-1 ; i>=0 && ipv6[i]==alias->Ip[i] ; i--);
742 if (i>=0) continue;
743 i=len/16;
744 if (i>=8 || len%16==0 || ((ipv6[i] ^ alias->Ip[i]) & (0xFFFF<<(len-i*16)))==0) {
745 return(alias->Alias);
746 }
747 }
748 return(url);
749 }
750
751 #ifdef USE_PCRE
752 /*!
753 Replace the host name by its alias if it is in our list.
754
755 \param url The host name.
756
757 \return The pointer to the host name or its alias.
758 */
759 static const char *alias_url_regex(const char *url)
760 {
761 struct hostalias_regex *alias;
762
763 for (alias=FirstAliasRe ; alias ; alias=alias->Next) {
764 if (pcre_exec(alias->Re,NULL,url,strlen(url),0,0,NULL,0)==0) {
765 return(alias->Alias);
766 }
767 }
768 return(url);
769 }
770 #endif
771
772 /*!
773 Find the beginning of the URL beyond the scheme://
774
775 \param url The url possibly containing a scheme.
776
777 \return The beginning of the url beyond the scheme.
778 */
779 const char *skip_scheme(const char *url)
780 {
781 const char *str;
782
783 /*
784 Skip any scheme:// at the beginning of the URL (see rfc2396 section 3.1).
785 The underscore is not part of the standard but is found in the squid logs as cache_object://.
786 */
787 for (str=url ; *str && (isalnum(*str) || *str=='+' || *str=='-' || *str=='.' || *str=='_') ; str++);
788 if (str[0]==':' && str[1]=='/' && str[2]=='/') {
789 url=str+3;
790 while (*url=='/') url++;
791 }
792 return(url);
793 }
794
795 /*!
796 Get the part of the URL necessary to generate the report.
797
798 \param url The URL as extracted from the report.
799 \param full_url \c True to keep the whole URL. If \c false,
800 the URL is truncated to only keep the host name and port number.
801 */
802 const char *process_url(char *url,bool full_url)
803 {
804 char *str;
805 const char *start;
806 int type;
807 const char *address;
808 unsigned char ipv4[4];
809 unsigned short int ipv6[8];
810 const char *next;
811
812 start=skip_scheme(url);
813 if (!full_url) {
814 for (str=(char *)start ; *str && *str!='/' && *str!='?' ; str++);
815 *str='\0';
816 #ifdef USE_PCRE
817 if (FirstAliasRe) {
818 address=alias_url_regex(start);
819 if (address!=start) return(address);
820 }
821 #endif
822 type=extract_address_mask(start,&address,ipv4,ipv6,NULL,&next);
823 if (type==1) {
824 if (FirstAliasName)
825 start=alias_url_name(start,next);
826 } else if (type==2) {
827 if (FirstAliasIpv4)
828 start=alias_url_ipv4(start,ipv4);
829 } else if (type==3) {
830 if (FirstAliasIpv6)
831 start=alias_url_ipv6(start,ipv6);
832 }
833 }
834 return(start);
835 }
836
837 /*!
838 Extract the host name from the URL.
839
840 \param url The url whose host name must be extracted.
841 \param hostname The buffer to store the host name.
842 \param hostsize The size of the host name buffer.
843
844 \note The function is stupid at this time. It just searches for the first slash
845 in the URL and truncates the URL there. It doesn't take the protocol into account
846 nor the port number nor any user or password information.
847 */
848 void url_hostname(const char *url,char *hostname,int hostsize)
849 {
850 int i;
851
852 hostsize--;
853 for (i=0 ; i<hostsize && url[i] && url[i]!='/' ; i++)
854 hostname[i]=url[i];
855 hostname[i]='\0';
856 }
857