]> git.ipfire.org Git - thirdparty/sarg.git/blob - url.c
3b6802e5d543e010886e0cf4470baf5994c3ec11
[thirdparty/sarg.git] / url.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2013
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29 #ifdef HAVE_PCRE_H
30 #include <pcre.h>
31 #define USE_PCRE
32 #else
33 #error "PCRE not compiled in"
34 #endif
35
36 /*!
37 A host name and the name to report.
38 */
39 struct hostalias_name
40 {
41 //! The next host name in the list or NULL for the last item.
42 struct hostalias_name *Next;
43 //! The minimum length of a candidate host name.
44 int MinLen;
45 //! The length of the constant part at the beginning of the mask.
46 int PrefixLen;
47 //! The length of the constant part at the end of the mask.
48 int SuffixLen;
49 //! The first part of the mask of the host name.
50 const char *HostName_Prefix;
51 //! The second part of the mask of the host name.
52 const char *HostName_Suffix;
53 //! The replacement name.
54 const char *Alias;
55 };
56
57 /*!
58 An IPv4 address and the name to report.
59 */
60 struct hostalias_ipv4
61 {
62 //! The next host name in the list or NULL for the last item.
63 struct hostalias_ipv4 *Next;
64 //! The IP address.
65 unsigned char Ip[4];
66 //! The number of bits in the prefix.
67 int NBits;
68 //! The replacement name.
69 const char *Alias;
70 };
71
72 /*!
73 An IPv6 address and the name to report.
74 */
75 struct hostalias_ipv6
76 {
77 //! The next host name in the list or NULL for the last item.
78 struct hostalias_ipv6 *Next;
79 //! The IP address.
80 unsigned short Ip[8];
81 //! The number of bits in the prefix.
82 int NBits;
83 //! The replacement name.
84 const char *Alias;
85 };
86
87 #ifdef USE_PCRE
88 /*!
89 A regular expression.
90 */
91 struct hostalias_regex
92 {
93 //! The next regular expression to test.
94 struct hostalias_regex *Next;
95 //! The regular expression to match against the host name.
96 pcre *Re;
97 //! The replacement name.
98 const char *Alias;
99 //! \c True if this regular expression contains at least one subpattern
100 bool SubPartern;
101 };
102 #endif
103
104 //! The first host name.
105 static struct hostalias_name *FirstAliasName=NULL;
106 //! The first IPv4 address.
107 static struct hostalias_ipv4 *FirstAliasIpv4=NULL;
108 //! The first IPvĀ§ address.
109 static struct hostalias_ipv6 *FirstAliasIpv6=NULL;
110
111 #ifdef USE_PCRE
112 static struct hostalias_regex *FirstAliasRe=NULL;
113 #endif
114
115 /*!
116 Store a name to alias.
117
118 \param name The name to match including the wildcard.
119 \param next A pointer to the first character after the name.
120
121 \retval 1 Alias added.
122 \retval 0 Ignore the line.
123 \retval -1 Error.
124 */
125 static int Alias_StoreName(const char *name,const char *next)
126 {
127 const char *NameBegin;
128 const char *NameBeginE;
129 const char *NameEnd;
130 const char *NameEndE;
131 const char *Replace;
132 const char *ReplaceE;
133 const char *str;
134 char sep;
135 struct hostalias_name *alias;
136 struct hostalias_name *new_alias;
137 struct hostalias_name *prev_alias;
138 char *tmp;
139 int len;
140
141 if (*name=='#' || *name==';') return(0);
142
143 // get host name and split at the wildcard
144 NameBegin=name;
145 for (str=NameBegin ; str<next && (unsigned char)*str>' ' && *str!='*' ; str++);
146 NameBeginE=str;
147 if (NameBegin==NameBeginE) NameBegin=NULL;
148 if (str<next && *str=='*') {
149 NameEnd=++str;
150 while (str<next && (unsigned char)*str>' ') {
151 if (*str=='*') {
152 debuga(_("Host name alias \"%s*%s\" contains too many wildcards (*)\n"),NameBegin,NameEnd);
153 return(-1);
154 }
155 str++;
156 }
157 NameEndE=str;
158 if (NameEnd==NameEndE) {
159 debuga(_("Host name alias \"%*s\" must not end with a wildcard\n"),(int)(next-name),name);
160 return(-1);
161 }
162 } else {
163 NameEnd=NULL;
164 }
165 while (str<next && (unsigned char)*str<=' ') str++;
166 if (!NameBegin && !NameEnd) return(0);
167
168 // get the alias
169 sep=*next;
170 if (sep==' ' || sep=='\t') {
171 Replace=next;
172 while (*Replace==' ' || *Replace=='\t') Replace++;
173 if ((unsigned char)*Replace<' ') {
174 Replace=NULL;
175 } else {
176 for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
177 ReplaceE=str;
178 }
179 } else
180 Replace=NULL;
181
182 if (NameBegin) {
183 len=(int)(NameBeginE-NameBegin);
184 tmp=malloc(len+1);
185 if (!tmp) {
186 debuga(_("Not enough memory to store the host name aliasing directives\n"));
187 return(-1);
188 }
189 memcpy(tmp,NameBegin,len);
190 tmp[len]='\0';
191 NameBegin=tmp;
192 }
193 if (NameEnd) {
194 len=(int)(NameEndE-NameEnd);
195 tmp=malloc(len+1);
196 if (!tmp) {
197 if (NameBegin) free((void*)NameBegin);
198 debuga(_("Not enough memory to store the host name aliasing directives\n"));
199 return(-1);
200 }
201 memcpy(tmp,NameEnd,len);
202 tmp[len]='\0';
203 NameEnd=tmp;
204 }
205
206 // ignore duplicates
207 prev_alias=NULL;
208 for (alias=FirstAliasName ; alias ; alias=alias->Next) {
209 if (((NameBegin && alias->HostName_Prefix && !strcmp(NameBegin,alias->HostName_Prefix)) || (!NameBegin && !alias->HostName_Prefix)) &&
210 ((NameEnd && alias->HostName_Suffix && !strcmp(NameEnd,alias->HostName_Suffix)) || (!NameEnd && !alias->HostName_Suffix))) {
211 if (NameBegin) free((void*)NameBegin);
212 return(0);
213 }
214 prev_alias=alias;
215 }
216
217 // insert into the list
218 new_alias=malloc(sizeof(*new_alias));
219 if (!new_alias) {
220 if (NameBegin) free((void*)NameBegin);
221 if (NameEnd) free((void*)NameEnd);
222 debuga(_("Not enough memory to store the host name aliasing directives\n"));
223 return(-1);
224 }
225 new_alias->MinLen=0;
226 if (NameBegin) {
227 new_alias->HostName_Prefix=NameBegin;
228 new_alias->MinLen+=strlen(NameBegin);
229 new_alias->PrefixLen=strlen(NameBegin);
230 } else {
231 new_alias->HostName_Prefix=NULL;
232 new_alias->PrefixLen=0;
233 }
234 if (NameEnd) {
235 new_alias->HostName_Suffix=NameEnd;
236 new_alias->MinLen+=strlen(NameEnd)+1;
237 new_alias->SuffixLen=strlen(NameEnd);
238 } else {
239 new_alias->HostName_Suffix=NULL;
240 new_alias->SuffixLen=0;
241 }
242 if (Replace) {
243 len=(int)(ReplaceE-Replace);
244 tmp=malloc(len+2);
245 if (!tmp) {
246 debuga(_("Not enough memory to store the host name aliasing directives\n"));
247 return(-1);
248 }
249 tmp[0]=ALIAS_PREFIX;
250 memcpy(tmp+1,Replace,len);
251 tmp[len+1]='\0';
252 new_alias->Alias=tmp;
253 } else {
254 tmp=malloc(new_alias->MinLen+2);
255 if (!tmp) {
256 debuga(_("Not enough memory to store the host name aliasing directives\n"));
257 return(-1);
258 }
259 tmp[0]=ALIAS_PREFIX;
260 if (new_alias->HostName_Prefix) strcpy(tmp+1,new_alias->HostName_Prefix);
261 if (new_alias->HostName_Suffix) {
262 tmp[new_alias->PrefixLen+1]='*';
263 strcpy(tmp+new_alias->PrefixLen+2,new_alias->HostName_Suffix);
264 }
265 new_alias->Alias=tmp;
266 }
267
268 new_alias->Next=NULL;
269 if (prev_alias)
270 prev_alias->Next=new_alias;
271 else
272 FirstAliasName=new_alias;
273 return(1);
274 }
275
276 /*!
277 Store a IPv4 to alias.
278
279 \param ipv4 The IPv4 to match.
280 \param nbits The number of bits in the prefix
281 \param next A pointer to the first character after the address.
282
283 \retval 1 Alias added.
284 \retval 0 Ignore the line.
285 \retval -1 Error.
286 */
287 static int Alias_StoreIpv4(unsigned char *ipv4,int nbits,const char *next)
288 {
289 const char *Replace;
290 const char *ReplaceE;
291 const char *str;
292 struct hostalias_ipv4 *alias;
293 struct hostalias_ipv4 *new_alias;
294 struct hostalias_ipv4 *prev_alias;
295 int i;
296 char *tmp;
297 int len;
298
299 // get the alias
300 Replace=next;
301 while (*Replace==' ' || *Replace=='\t') Replace++;
302 if ((unsigned char)*Replace<' ') {
303 Replace=NULL;
304 } else {
305 for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
306 ReplaceE=str;
307 }
308
309 // store more restrictive range first
310 prev_alias=NULL;
311 for (alias=FirstAliasIpv4 ; alias ; alias=alias->Next) {
312 i=(nbits<alias->NBits) ? nbits : alias->NBits;
313 if ((i<8 || memcmp(ipv4,alias->Ip,i/8)==0) && ((i%8)==0 || (ipv4[i/8] ^ alias->Ip[i/8]) & (0xFFU<<(8-i%8)))==0) {
314 break;
315 }
316 prev_alias=alias;
317 }
318
319 // insert into the list
320 new_alias=malloc(sizeof(*new_alias));
321 if (!new_alias) {
322 debuga(_("Not enough memory to store the host name aliasing directives\n"));
323 return(-1);
324 }
325 memcpy(new_alias->Ip,ipv4,4);
326 new_alias->NBits=nbits;
327 if (Replace) {
328 len=(int)(ReplaceE-Replace);
329 tmp=malloc(len+2);
330 if (!tmp) {
331 debuga(_("Not enough memory to store the host name aliasing directives\n"));
332 return(-1);
333 }
334 tmp[0]=ALIAS_PREFIX;
335 memcpy(tmp+1,Replace,len);
336 tmp[len+1]='\0';
337 new_alias->Alias=tmp;
338 } else {
339 tmp=malloc(5*4+1);
340 if (!tmp) {
341 debuga(_("Not enough memory to store the host name aliasing directives\n"));
342 return(-1);
343 }
344 sprintf(tmp,"%c%d.%d.%d.%d/%d",ALIAS_PREFIX,ipv4[0],ipv4[1],ipv4[2],ipv4[3],nbits);
345 new_alias->Alias=tmp;
346 }
347
348 if (prev_alias) {
349 new_alias->Next=prev_alias->Next;
350 prev_alias->Next=new_alias;
351 } else {
352 new_alias->Next=NULL;
353 FirstAliasIpv4=new_alias;
354 }
355 return(1);
356 }
357
358 /*!
359 Store a IPv6 to alias.
360
361 \param ipv6 The IPv6 to match.
362 \param nbits The number of bits in the prefix
363 \param next A pointer to the first character after the address.
364
365 \retval 1 Alias added.
366 \retval 0 Ignore the line.
367 \retval -1 Error.
368 */
369 static int Alias_StoreIpv6(unsigned short *ipv6,int nbits,const char *next)
370 {
371 const char *Replace;
372 const char *ReplaceE;
373 const char *str;
374 struct hostalias_ipv6 *alias;
375 struct hostalias_ipv6 *new_alias;
376 struct hostalias_ipv6 *prev_alias;
377 int i;
378 char *tmp;
379 int len;
380
381 // get the alias
382 Replace=next;
383 while (*Replace==' ' || *Replace=='\t') Replace++;
384 if ((unsigned char)*Replace<' ') {
385 Replace=NULL;
386 } else {
387 for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
388 ReplaceE=str;
389 }
390
391 // store more restrictive range first
392 prev_alias=NULL;
393 for (alias=FirstAliasIpv6 ; alias ; alias=alias->Next) {
394 i=(nbits<alias->NBits) ? nbits : alias->NBits;
395 if ((i<16 || memcmp(ipv6,alias->Ip,i/16*2)==0) && ((i%16)==0 || (ipv6[i/16] ^ alias->Ip[i/16]) & (0xFFFFU<<(16-i%16)))==0) {
396 break;
397 }
398 prev_alias=alias;
399 }
400
401 // insert into the list
402 new_alias=malloc(sizeof(*new_alias));
403 if (!new_alias) {
404 debuga(_("Not enough memory to store the host name aliasing directives\n"));
405 return(-1);
406 }
407 memcpy(new_alias->Ip,ipv6,8*sizeof(unsigned short int));
408 new_alias->NBits=nbits;
409 if (Replace) {
410 len=ReplaceE-Replace;
411 tmp=malloc(len+2);
412 if (!tmp) {
413 debuga(_("Not enough memory to store the host name aliasing directives\n"));
414 return(-1);
415 }
416 tmp[0]=ALIAS_PREFIX;
417 memcpy(tmp+1,Replace,len);
418 tmp[len+1]='\0';
419 new_alias->Alias=tmp;
420 } else {
421 tmp=malloc(5*8+5);
422 if (!tmp) {
423 debuga(_("Not enough memory to store the host name aliasing directives\n"));
424 return(-1);
425 }
426 sprintf(tmp,"%c%x:%x:%x:%x:%x:%x:%x:%x/%d",ALIAS_PREFIX,ipv6[0],ipv6[1],ipv6[2],ipv6[3],ipv6[4],ipv6[5],ipv6[6],ipv6[7],nbits);
427 new_alias->Alias=tmp;
428 }
429
430 if (prev_alias) {
431 new_alias->Next=prev_alias->Next;
432 prev_alias->Next=new_alias;
433 } else {
434 new_alias->Next=NULL;
435 FirstAliasIpv6=new_alias;
436 }
437 return(1);
438 }
439
440 #ifdef USE_PCRE
441 /*!
442 Store a regular expression to match the alias.
443
444 \retval 1 Alias added.
445 \retval 0 Ignore the line.
446 \retval -1 Error.
447 */
448 static int Alias_StoreRegexp(char *buf)
449 {
450 char Delimiter;
451 char *End;
452 struct hostalias_regex *alias;
453 struct hostalias_regex *new_alias;
454 struct hostalias_regex **prev_alias;
455 const char *PcreError;
456 int ErrorOffset;
457 char *Replace;
458 int len;
459 char *tmp;
460 int i;
461
462 // find the pattern
463 Delimiter=*buf++;
464 for (End=buf ; *End && *End!=Delimiter ; End++) {
465 if (*End=='\\') {
466 if (End[1]=='\0') {
467 debuga(_("Invalid NUL character found in regular expression\n"));
468 return(-1);
469 }
470 End++; //ignore the escaped character
471 }
472 }
473 if (*End!=Delimiter) {
474 debuga(_("Unterminated regular expression\n"));
475 return(-1);
476 }
477 *End++='\0';
478
479 // find the alias
480 for (Replace=End ; *Replace==' ' || *Replace=='\t' ; Replace++);
481 for (End=Replace ; *End && (unsigned char)*End>' ' ; End++);
482 *End='\0';
483
484 // store it
485 new_alias=malloc(sizeof(*new_alias));
486 if (!new_alias) {
487 debuga(_("Not enough memory to store the host name aliasing directives\n"));
488 return(-1);
489 }
490 new_alias->Next=NULL;
491 new_alias->Re=pcre_compile(buf,0,&PcreError,&ErrorOffset,NULL);
492 if (new_alias->Re==NULL) {
493 debuga(_("Failed to compile the regular expression \"%s\": %s\n"),buf,PcreError);
494 free(new_alias);
495 return(-1);
496 }
497 len=strlen(Replace);
498 tmp=malloc(len+2);
499 if (!tmp) {
500 debuga(_("Not enough memory to store the host name aliasing directives\n"));
501 pcre_free(new_alias->Re);
502 return(-1);
503 }
504 tmp[0]=ALIAS_PREFIX;
505 memcpy(tmp+1,Replace,len);
506 tmp[len+1]='\0';
507 new_alias->Alias=tmp;
508
509 new_alias->SubPartern=false;
510 for (i=1 ; tmp[i] ; i++)
511 // both the sed \1 and the perl $1 replacement operators are accepted
512 if ((tmp[i]=='\\' || tmp[i]=='$') && isdigit(tmp[i+1])) {
513 new_alias->SubPartern=true;
514 break;
515 }
516
517 // chain it
518 prev_alias=&FirstAliasRe;
519 for (alias=FirstAliasRe ; alias ; alias=alias->Next)
520 prev_alias=&alias->Next;
521 *prev_alias=new_alias;
522
523 return(1);
524 }
525 #endif
526
527 /*!
528 Store an alias in the corresponding list.
529
530 \param buf The string to parse and store.
531
532 \retval 0 No error.
533 \retval -1 Error in file.
534 */
535 static int Alias_Store(char *buf)
536 {
537 int type;
538 const char *name;
539 unsigned char ipv4[4];
540 unsigned short int ipv6[8];
541 int nbits;
542 const char *next;
543 int Error;
544
545 if (strncasecmp(buf,"re:",3)==0) {
546 #ifdef USE_PCRE
547 if (Alias_StoreRegexp(buf+3)<0)
548 return(-1);
549 return(0);
550 #else
551 debuga(_("PCRE not compiled in therefore the regular expressions are not available in the host alias file\n"));
552 return(-1);
553 #endif
554 }
555 type=extract_address_mask(buf,&name,ipv4,ipv6,&nbits,&next);
556 if (type<0) {
557 return(-1);
558 }
559
560 if (type==1) {
561 Error=Alias_StoreName(name,next);
562 } else if (type==2) {
563 Error=Alias_StoreIpv4(ipv4,nbits,next);
564 } else if (type==3) {
565 Error=Alias_StoreIpv6(ipv6,nbits,next);
566 }
567 if (Error<0) return(-1);
568 return(0);
569 }
570
571 /*!
572 Read the file containing the host names to alias in the report.
573
574 \param Filename The name of the file.
575 */
576 void read_hostalias(const char *Filename)
577 {
578 FILE *fi;
579 longline line;
580 char *buf;
581
582 if (debug) debuga(_("Reading host alias file \"%s\"\n"),Filename);
583 fi=fopen(Filename,"rt");
584 if (!fi) {
585 debuga(_("Cannot read host name alias file \"%s\" - %s\n"),Filename,strerror(errno));
586 exit(EXIT_FAILURE);
587 }
588
589 if ((line=longline_create())==NULL) {
590 debuga(_("Not enough memory to read the host name aliases\n"));
591 exit(EXIT_FAILURE);
592 }
593
594 while ((buf=longline_read(fi,line)) != NULL) {
595 if (Alias_Store(buf)<0) {
596 debuga(_("While reading \"%s\"\n"),Filename);
597 exit(EXIT_FAILURE);
598 }
599 }
600
601 longline_destroy(&line);
602 fclose(fi);
603
604 if (debug) {
605 struct hostalias_name *alias1;
606 struct hostalias_ipv4 *alias4;
607 struct hostalias_ipv6 *alias6;
608
609 debuga(_("List of host names to alias:\n"));
610 for (alias1=FirstAliasName ; alias1 ; alias1=alias1->Next) {
611 if (alias1->HostName_Prefix && alias1->HostName_Suffix)
612 debuga(_(" %s*%s => %s\n"),alias1->HostName_Prefix,alias1->HostName_Suffix,alias1->Alias);
613 else if (alias1->HostName_Prefix)
614 debuga(_(" %s => %s\n"),alias1->HostName_Prefix,alias1->Alias);
615 else
616 debuga(_(" *%s => %s\n"),alias1->HostName_Suffix,alias1->Alias);
617 }
618 for (alias4=FirstAliasIpv4 ; alias4 ; alias4=alias4->Next) {
619 debuga(_(" %d.%d.%d.%d/%d => %s\n"),alias4->Ip[0],alias4->Ip[1],alias4->Ip[2],alias4->Ip[3],alias4->NBits,alias4->Alias);
620 }
621 for (alias6=FirstAliasIpv6 ; alias6 ; alias6=alias6->Next) {
622 debuga(_(" %x:%x:%x:%x:%x:%x:%x:%x/%d => %s\n"),alias6->Ip[0],alias6->Ip[1],alias6->Ip[2],alias6->Ip[3],
623 alias6->Ip[4],alias6->Ip[5],alias6->Ip[6],alias6->Ip[7],alias6->NBits,alias6->Alias);
624 }
625 }
626 }
627
628 /*!
629 Free the memory allocated by read_hostalias().
630 */
631 void free_hostalias(void)
632 {
633 {
634 struct hostalias_name *alias1;
635 struct hostalias_name *next1;
636
637 for (alias1=FirstAliasName ; alias1 ; alias1=next1) {
638 next1=alias1->Next;
639 if (alias1->HostName_Prefix) free((void *)alias1->HostName_Prefix);
640 if (alias1->HostName_Suffix) free((void *)alias1->HostName_Suffix);
641 free((void *)alias1->Alias);
642 free(alias1);
643 }
644 FirstAliasName=NULL;
645 }
646 {
647 struct hostalias_ipv4 *alias4;
648 struct hostalias_ipv4 *next4;
649
650 for (alias4=FirstAliasIpv4 ; alias4 ; alias4=next4) {
651 next4=alias4->Next;
652 free((void *)alias4->Alias);
653 free(alias4);
654 }
655 FirstAliasIpv4=NULL;
656 }
657 {
658 struct hostalias_ipv6 *alias6;
659 struct hostalias_ipv6 *next6;
660
661 for (alias6=FirstAliasIpv6 ; alias6 ; alias6=next6) {
662 next6=alias6->Next;
663 free((void *)alias6->Alias);
664 free(alias6);
665 }
666 FirstAliasIpv6=NULL;
667 }
668 #ifdef USE_PCRE
669 {
670 struct hostalias_regex *alias;
671 struct hostalias_regex *next;
672
673 for (alias=FirstAliasRe ; alias ; alias=next) {
674 next=alias->Next;
675 pcre_free(alias->Re);
676 free((void *)alias->Alias);
677 free(alias);
678 }
679 FirstAliasRe=NULL;
680 }
681 #endif
682 }
683
684 /*!
685 Replace the host name by its alias if it is in our list.
686
687 \param url The host name.
688
689 \return The pointer to the host name or its alias.
690 */
691 static const char *alias_url_name(const char *url,const char *next)
692 {
693 struct hostalias_name *alias;
694 int len;
695
696 len=(int)(next-url);
697 for (alias=FirstAliasName ; alias ; alias=alias->Next) {
698 if (len<alias->MinLen) continue;
699 if (alias->HostName_Prefix) {
700 if (alias->HostName_Suffix) {
701 if (strncasecmp(url,alias->HostName_Prefix,alias->PrefixLen)==0 &&
702 strncasecmp(url+(len-alias->SuffixLen),alias->HostName_Suffix,len)==0) {
703 return(alias->Alias);
704 }
705 } else {
706 if (len==alias->PrefixLen && strncasecmp(url,alias->HostName_Prefix,len)==0) {
707 return(alias->Alias);
708 }
709 }
710 } else if (strncasecmp(url+(len-alias->SuffixLen),alias->HostName_Suffix,len)==0) {
711 return(alias->Alias);
712 }
713 }
714 return(url);
715 }
716
717 /*!
718 Replace the IPv4 address by its alias if it is in our list.
719
720 \param url The host name.
721 \param ipv4 The address.
722
723 \return The pointer to the host name or its alias.
724 */
725 static const char *alias_url_ipv4(const char *url,unsigned char *ipv4)
726 {
727 struct hostalias_ipv4 *alias;
728 int len;
729
730 for (alias=FirstAliasIpv4 ; alias ; alias=alias->Next) {
731 len=alias->NBits;
732 if ((len<8 || memcmp(ipv4,alias->Ip,len/8)==0) && ((len%8)==0 || (ipv4[len/8] ^ alias->Ip[len/8]) & (0xFFU<<(8-len%8)))==0) {
733 return(alias->Alias);
734 }
735 }
736 return(url);
737 }
738
739 /*!
740 Replace the IPv6 address by its alias if it is in our list.
741
742 \param url The host name.
743 \param ipv6 The address.
744
745 \return The pointer to the host name or its alias.
746 */
747 static const char *alias_url_ipv6(const char *url,unsigned short int *ipv6)
748 {
749 struct hostalias_ipv6 *alias;
750 int len;
751 int i;
752
753 for (alias=FirstAliasIpv6 ; alias ; alias=alias->Next) {
754 len=alias->NBits;
755 for (i=len/16-1 ; i>=0 && ipv6[i]==alias->Ip[i] ; i--);
756 if (i>=0) continue;
757 i=len/16;
758 if (i>=8 || len%16==0 || ((ipv6[i] ^ alias->Ip[i]) & (0xFFFF<<(len-i*16)))==0) {
759 return(alias->Alias);
760 }
761 }
762 return(url);
763 }
764
765 #ifdef USE_PCRE
766 /*!
767 Replace the host name by its alias if it is in our list.
768
769 \param url_ptr A pointer to the host name to match. It is replaced
770 by a pointer to the alias if a match is found.
771
772 \return \c True if a match is found or \c false if it failed.
773
774 \warning The function is not thread safe as it may return a static
775 internal buffer.
776 */
777 static bool alias_url_regex(const char **url_ptr)
778 {
779 struct hostalias_regex *alias;
780 int nmatches;
781 const char *url;
782 int url_len;
783 int ovector[30];//size must be a multiple of 3
784 static char Replacement[1024];
785 const char *str;
786 int i;
787 int sub;
788 int repl_idx;
789
790 url=*url_ptr;
791 url_len=strlen(url);
792 for (alias=FirstAliasRe ; alias ; alias=alias->Next) {
793 nmatches=pcre_exec(alias->Re,NULL,url,url_len,0,0,ovector,sizeof(ovector)/sizeof(ovector[0]));
794 if (nmatches>=0) {
795 if (nmatches==0) nmatches=(int)(sizeof(ovector)/sizeof(ovector[0]))/3*2; //only 2/3 of the vector is used by pcre_exec
796 if (nmatches==1 || !alias->SubPartern) { //no subpattern to replace
797 *url_ptr=alias->Alias;
798 } else {
799 repl_idx=0;
800 str=alias->Alias;
801 for (i=0 ; str[i] ; i++) {
802 // both the sed \1 and the perl $1 replacement operators are accepted
803 if ((str[i]=='\\' || str[i]=='$') && isdigit(str[i+1])) {
804 sub=str[++i]-'0';
805 if (sub>=1 && sub<=nmatches) {
806 /*
807 * ovector[sub] is the start position of the match.
808 * ovector[sub+1] is the end position of the match.
809 */
810 sub<<=1;
811 if (repl_idx+ovector[sub+1]-ovector[sub]>=sizeof(Replacement)-1) break;
812 memcpy(Replacement+repl_idx,url+ovector[sub],ovector[sub+1]-ovector[sub]);
813 repl_idx+=ovector[sub+1]-ovector[sub];
814 continue;
815 }
816 }
817 if (repl_idx>=sizeof(Replacement)-1) break;
818 Replacement[repl_idx++]=str[i];
819 }
820 Replacement[repl_idx]='\0';
821 *url_ptr=Replacement;
822 }
823 return(true);
824 }
825 }
826 return(false);
827 }
828 #endif
829
830 /*!
831 Find the beginning of the URL beyond the scheme://
832
833 \param url The url possibly containing a scheme.
834
835 \return The beginning of the url beyond the scheme.
836 */
837 const char *skip_scheme(const char *url)
838 {
839 const char *str;
840
841 /*
842 Skip any scheme:// at the beginning of the URL (see rfc2396 section 3.1).
843 The underscore is not part of the standard but is found in the squid logs as cache_object://.
844 */
845 for (str=url ; *str && (isalnum(*str) || *str=='+' || *str=='-' || *str=='.' || *str=='_') ; str++);
846 if (str[0]==':' && str[1]=='/' && str[2]=='/') {
847 url=str+3;
848 while (*url=='/') url++;
849 }
850 return(url);
851 }
852
853 /*!
854 Get the part of the URL necessary to generate the report.
855
856 \param url The URL as extracted from the report.
857 \param full_url \c True to keep the whole URL. If \c false,
858 the URL is truncated to only keep the host name and port number.
859 */
860 const char *process_url(const char *url,bool full_url)
861 {
862 static char short_url[1024];
863 int i;
864 const char *start;
865 int type;
866 unsigned char ipv4[4];
867 unsigned short int ipv6[8];
868 const char *next;
869
870 start=skip_scheme(url);
871 if (!full_url) {
872 for (i=0 ; i<sizeof(short_url)-1 && start[i] && start[i]!='/' && start[i]!='?' ; i++)
873 short_url[i]=start[i];
874 short_url[i]='\0';
875 start=short_url;
876 #ifdef USE_PCRE
877 if (FirstAliasRe) {
878 if (alias_url_regex(&start)) return(start);
879 }
880 #endif
881 type=extract_address_mask(start,NULL,ipv4,ipv6,NULL,&next);
882 if (type==1) {
883 if (FirstAliasName)
884 start=alias_url_name(start,next);
885 } else if (type==2) {
886 if (FirstAliasIpv4)
887 start=alias_url_ipv4(start,ipv4);
888 } else if (type==3) {
889 if (FirstAliasIpv6)
890 start=alias_url_ipv6(start,ipv6);
891 }
892 }
893 return(start);
894 }
895
896 /*!
897 Extract the host name from the URL.
898
899 \param url The url whose host name must be extracted.
900 \param hostname The buffer to store the host name.
901 \param hostsize The size of the host name buffer.
902
903 \note The function is stupid at this time. It just searches for the first slash
904 in the URL and truncates the URL there. It doesn't take the protocol into account
905 nor the port number nor any user or password information.
906 */
907 void url_hostname(const char *url,char *hostname,int hostsize)
908 {
909 int i;
910
911 hostsize--;
912 for (i=0 ; i<hostsize && url[i] && url[i]!='/' ; i++)
913 hostname[i]=url[i];
914 hostname[i]='\0';
915 }
916