]> git.ipfire.org Git - thirdparty/sarg.git/blame - url.c
Remove trailing spaces in every source file
[thirdparty/sarg.git] / url.c
CommitLineData
22715352
FM
1/*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
61d965f3 3 * 1998, 2012
22715352
FM
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27#include "include/conf.h"
28#include "include/defs.h"
6e24f222
FM
29#ifdef HAVE_PCRE_H
30#include <pcre.h>
31#define USE_PCRE
32#else
33#error "PCRE not compiled in"
34#endif
22715352
FM
35
36/*!
37A host name and the name to report.
38*/
5207d9f8 39struct hostalias_name
22715352
FM
40{
41 //! The next host name in the list or NULL for the last item.
5207d9f8 42 struct hostalias_name *Next;
22715352
FM
43 //! The minimum length of a candidate host name.
44 int MinLen;
45 //! The length of the constant part at the beginning of the mask.
46 int PrefixLen;
47 //! The length of the constant part at the end of the mask.
48 int SuffixLen;
49 //! The first part of the mask of the host name.
50 const char *HostName_Prefix;
51 //! The second part of the mask of the host name.
52 const char *HostName_Suffix;
53 //! The replacement name.
54 const char *Alias;
55};
56
5207d9f8
FM
57/*!
58An IPv4 address and the name to report.
59*/
60struct hostalias_ipv4
61{
62 //! The next host name in the list or NULL for the last item.
63 struct hostalias_ipv4 *Next;
64 //! The IP address.
65 unsigned char Ip[4];
66 //! The number of bits in the prefix.
67 int NBits;
68 //! The replacement name.
69 const char *Alias;
70};
71
72/*!
73An IPv6 address and the name to report.
74*/
75struct hostalias_ipv6
76{
77 //! The next host name in the list or NULL for the last item.
78 struct hostalias_ipv6 *Next;
79 //! The IP address.
8b88fb66 80 unsigned short Ip[8];
5207d9f8
FM
81 //! The number of bits in the prefix.
82 int NBits;
83 //! The replacement name.
84 const char *Alias;
85};
86
6e24f222
FM
87#ifdef USE_PCRE
88/*!
89A regular expression.
90*/
91struct hostalias_regex
92{
93 //! The next regular expression to test.
94 struct hostalias_regex *Next;
95 //! The regular expression to match against the host name.
96 pcre *Re;
97 //! The replacement name.
98 const char *Alias;
a16cb22a
FM
99 //! \c True if this regular expression contains at least one subpattern
100 bool SubPartern;
6e24f222
FM
101};
102#endif
103
22715352 104//! The first host name.
5207d9f8
FM
105static struct hostalias_name *FirstAliasName=NULL;
106//! The first IPv4 address.
107static struct hostalias_ipv4 *FirstAliasIpv4=NULL;
108//! The first IPv§ address.
109static struct hostalias_ipv6 *FirstAliasIpv6=NULL;
22715352 110
6e24f222
FM
111#ifdef USE_PCRE
112static struct hostalias_regex *FirstAliasRe=NULL;
113#endif
114
22715352 115/*!
5207d9f8 116 Store a name to alias.
22715352 117
5207d9f8
FM
118 \param name The name to match including the wildcard.
119 \param next A pointer to the first character after the name.
120
121 \retval 1 Alias added.
122 \retval 0 Ignore the line.
123 \retval -1 Error.
124 */
7819e0d5 125static int Alias_StoreName(const char *name,const char *next)
22715352 126{
7819e0d5
FM
127 const char *NameBegin;
128 const char *NameBeginE;
129 const char *NameEnd;
130 const char *NameEndE;
131 const char *Replace;
132 const char *ReplaceE;
133 const char *str;
5207d9f8
FM
134 char sep;
135 struct hostalias_name *alias;
136 struct hostalias_name *new_alias;
137 struct hostalias_name *prev_alias;
67a93701 138 char *tmp;
7819e0d5 139 int len;
22715352 140
5207d9f8 141 if (*name=='#' || *name==';') return(0);
22715352 142
5207d9f8
FM
143 // get host name and split at the wildcard
144 NameBegin=name;
7819e0d5
FM
145 for (str=NameBegin ; str<next && (unsigned char)*str>' ' && *str!='*' ; str++);
146 NameBeginE=str;
147 if (NameBegin==NameBeginE) NameBegin=NULL;
148 if (str<next && *str=='*') {
149 NameEnd=++str;
150 while (str<next && (unsigned char)*str>' ') {
5207d9f8 151 if (*str=='*') {
c4633554 152 debuga(_("Host name alias \"%s*%s\" contains too many wildcards (*)\n"),NameBegin,NameEnd);
5207d9f8 153 return(-1);
22715352 154 }
5207d9f8 155 str++;
22715352 156 }
7819e0d5
FM
157 NameEndE=str;
158 if (NameEnd==NameEndE) {
c4633554 159 debuga(_("Host name alias \"%*s\" must not end with a wildcard\n"),(int)(next-name),name);
7819e0d5
FM
160 return(-1);
161 }
162 } else {
5207d9f8 163 NameEnd=NULL;
5207d9f8 164 }
7819e0d5 165 while (str<next && (unsigned char)*str<=' ') str++;
5207d9f8 166 if (!NameBegin && !NameEnd) return(0);
22715352 167
5207d9f8 168 // get the alias
7819e0d5 169 sep=*next;
5207d9f8
FM
170 if (sep==' ' || sep=='\t') {
171 Replace=next;
22715352
FM
172 while (*Replace==' ' || *Replace=='\t') Replace++;
173 if ((unsigned char)*Replace<' ') {
174 Replace=NULL;
175 } else {
176 for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
7819e0d5 177 ReplaceE=str;
22715352 178 }
5207d9f8
FM
179 } else
180 Replace=NULL;
22715352 181
7819e0d5
FM
182 if (NameBegin) {
183 len=(int)(NameBeginE-NameBegin);
184 tmp=malloc(len+1);
185 if (!tmp) {
c4633554 186 debuga(_("Not enough memory to store the host name aliasing directives\n"));
7819e0d5
FM
187 return(-1);
188 }
189 memcpy(tmp,NameBegin,len);
190 tmp[len]='\0';
191 NameBegin=tmp;
192 }
193 if (NameEnd) {
194 len=(int)(NameEndE-NameEnd);
195 tmp=malloc(len+1);
196 if (!tmp) {
197 if (NameBegin) free((void*)NameBegin);
c4633554 198 debuga(_("Not enough memory to store the host name aliasing directives\n"));
7819e0d5
FM
199 return(-1);
200 }
201 memcpy(tmp,NameEnd,len);
202 tmp[len]='\0';
203 NameEnd=tmp;
204 }
bd43d81f 205
5207d9f8
FM
206 // ignore duplicates
207 prev_alias=NULL;
208 for (alias=FirstAliasName ; alias ; alias=alias->Next) {
209 if (((NameBegin && alias->HostName_Prefix && !strcmp(NameBegin,alias->HostName_Prefix)) || (!NameBegin && !alias->HostName_Prefix)) &&
210 ((NameEnd && alias->HostName_Suffix && !strcmp(NameEnd,alias->HostName_Suffix)) || (!NameEnd && !alias->HostName_Suffix))) {
7819e0d5 211 if (NameBegin) free((void*)NameBegin);
5207d9f8 212 return(0);
22715352 213 }
5207d9f8
FM
214 prev_alias=alias;
215 }
22715352 216
5207d9f8
FM
217 // insert into the list
218 new_alias=malloc(sizeof(*new_alias));
219 if (!new_alias) {
7819e0d5
FM
220 if (NameBegin) free((void*)NameBegin);
221 if (NameEnd) free((void*)NameEnd);
c4633554 222 debuga(_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8
FM
223 return(-1);
224 }
225 new_alias->MinLen=0;
226 if (NameBegin) {
7819e0d5 227 new_alias->HostName_Prefix=NameBegin;
5207d9f8
FM
228 new_alias->MinLen+=strlen(NameBegin);
229 new_alias->PrefixLen=strlen(NameBegin);
230 } else {
231 new_alias->HostName_Prefix=NULL;
232 new_alias->PrefixLen=0;
233 }
234 if (NameEnd) {
7819e0d5 235 new_alias->HostName_Suffix=NameEnd;
5207d9f8
FM
236 new_alias->MinLen+=strlen(NameEnd)+1;
237 new_alias->SuffixLen=strlen(NameEnd);
238 } else {
239 new_alias->HostName_Suffix=NULL;
240 new_alias->SuffixLen=0;
241 }
242 if (Replace) {
7819e0d5
FM
243 len=(int)(ReplaceE-Replace);
244 tmp=malloc(len+2);
5207d9f8 245 if (!tmp) {
c4633554 246 debuga(_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8 247 return(-1);
22715352 248 }
5207d9f8 249 tmp[0]=ALIAS_PREFIX;
7819e0d5
FM
250 memcpy(tmp+1,Replace,len);
251 tmp[len+1]='\0';
5207d9f8
FM
252 new_alias->Alias=tmp;
253 } else {
254 tmp=malloc(new_alias->MinLen+2);
255 if (!tmp) {
c4633554 256 debuga(_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8
FM
257 return(-1);
258 }
259 tmp[0]=ALIAS_PREFIX;
260 if (new_alias->HostName_Prefix) strcpy(tmp+1,new_alias->HostName_Prefix);
261 if (new_alias->HostName_Suffix) {
262 tmp[new_alias->PrefixLen+1]='*';
263 strcpy(tmp+new_alias->PrefixLen+2,new_alias->HostName_Suffix);
264 }
265 new_alias->Alias=tmp;
266 }
bd43d81f 267
5207d9f8
FM
268 new_alias->Next=NULL;
269 if (prev_alias)
270 prev_alias->Next=new_alias;
271 else
272 FirstAliasName=new_alias;
273 return(1);
274}
275
276/*!
277 Store a IPv4 to alias.
278
279 \param ipv4 The IPv4 to match.
280 \param nbits The number of bits in the prefix
281 \param next A pointer to the first character after the address.
282
283 \retval 1 Alias added.
284 \retval 0 Ignore the line.
285 \retval -1 Error.
286 */
7819e0d5 287static int Alias_StoreIpv4(unsigned char *ipv4,int nbits,const char *next)
5207d9f8 288{
7819e0d5
FM
289 const char *Replace;
290 const char *ReplaceE;
291 const char *str;
5207d9f8
FM
292 struct hostalias_ipv4 *alias;
293 struct hostalias_ipv4 *new_alias;
294 struct hostalias_ipv4 *prev_alias;
295 int i;
296 char *tmp;
7819e0d5 297 int len;
5207d9f8
FM
298
299 // get the alias
300 Replace=next;
301 while (*Replace==' ' || *Replace=='\t') Replace++;
302 if ((unsigned char)*Replace<' ') {
303 Replace=NULL;
304 } else {
305 for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
7819e0d5 306 ReplaceE=str;
5207d9f8
FM
307 }
308
309 // store more restrictive range first
310 prev_alias=NULL;
311 for (alias=FirstAliasIpv4 ; alias ; alias=alias->Next) {
312 i=(nbits<alias->NBits) ? nbits : alias->NBits;
313 if ((i<8 || memcmp(ipv4,alias->Ip,i/8)==0) && ((i%8)==0 || (ipv4[i/8] ^ alias->Ip[i/8]) & (0xFFU<<(8-i%8)))==0) {
314 break;
315 }
316 prev_alias=alias;
317 }
318
319 // insert into the list
320 new_alias=malloc(sizeof(*new_alias));
321 if (!new_alias) {
c4633554 322 debuga(_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8
FM
323 return(-1);
324 }
325 memcpy(new_alias->Ip,ipv4,4);
326 new_alias->NBits=nbits;
327 if (Replace) {
7819e0d5
FM
328 len=(int)(ReplaceE-Replace);
329 tmp=malloc(len+2);
5207d9f8 330 if (!tmp) {
c4633554 331 debuga(_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8
FM
332 return(-1);
333 }
334 tmp[0]=ALIAS_PREFIX;
7819e0d5
FM
335 memcpy(tmp+1,Replace,len);
336 tmp[len+1]='\0';
5207d9f8
FM
337 new_alias->Alias=tmp;
338 } else {
339 tmp=malloc(5*4+1);
340 if (!tmp) {
c4633554 341 debuga(_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8 342 return(-1);
22715352 343 }
5207d9f8
FM
344 sprintf(tmp,"%c%d.%d.%d.%d/%d",ALIAS_PREFIX,ipv4[0],ipv4[1],ipv4[2],ipv4[3],nbits);
345 new_alias->Alias=tmp;
346 }
bd43d81f 347
5207d9f8
FM
348 if (prev_alias) {
349 new_alias->Next=prev_alias->Next;
350 prev_alias->Next=new_alias;
351 } else {
22715352 352 new_alias->Next=NULL;
5207d9f8
FM
353 FirstAliasIpv4=new_alias;
354 }
355 return(1);
356}
357
358/*!
359 Store a IPv6 to alias.
360
361 \param ipv6 The IPv6 to match.
362 \param nbits The number of bits in the prefix
363 \param next A pointer to the first character after the address.
364
365 \retval 1 Alias added.
366 \retval 0 Ignore the line.
367 \retval -1 Error.
368 */
7819e0d5 369static int Alias_StoreIpv6(unsigned short *ipv6,int nbits,const char *next)
5207d9f8 370{
7819e0d5
FM
371 const char *Replace;
372 const char *ReplaceE;
373 const char *str;
5207d9f8
FM
374 struct hostalias_ipv6 *alias;
375 struct hostalias_ipv6 *new_alias;
376 struct hostalias_ipv6 *prev_alias;
377 int i;
378 char *tmp;
7819e0d5 379 int len;
5207d9f8
FM
380
381 // get the alias
382 Replace=next;
383 while (*Replace==' ' || *Replace=='\t') Replace++;
384 if ((unsigned char)*Replace<' ') {
385 Replace=NULL;
386 } else {
387 for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
7819e0d5 388 ReplaceE=str;
5207d9f8
FM
389 }
390
391 // store more restrictive range first
392 prev_alias=NULL;
393 for (alias=FirstAliasIpv6 ; alias ; alias=alias->Next) {
394 i=(nbits<alias->NBits) ? nbits : alias->NBits;
395 if ((i<16 || memcmp(ipv6,alias->Ip,i/16*2)==0) && ((i%16)==0 || (ipv6[i/16] ^ alias->Ip[i/16]) & (0xFFFFU<<(16-i%16)))==0) {
396 break;
397 }
398 prev_alias=alias;
399 }
400
401 // insert into the list
402 new_alias=malloc(sizeof(*new_alias));
403 if (!new_alias) {
c4633554 404 debuga(_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8
FM
405 return(-1);
406 }
0ec4b481 407 memcpy(new_alias->Ip,ipv6,8*sizeof(unsigned short int));
5207d9f8
FM
408 new_alias->NBits=nbits;
409 if (Replace) {
7819e0d5
FM
410 len=ReplaceE-Replace;
411 tmp=malloc(len+2);
5207d9f8 412 if (!tmp) {
c4633554 413 debuga(_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8
FM
414 return(-1);
415 }
416 tmp[0]=ALIAS_PREFIX;
7819e0d5
FM
417 memcpy(tmp+1,Replace,len);
418 tmp[len+1]='\0';
5207d9f8
FM
419 new_alias->Alias=tmp;
420 } else {
0ec4b481 421 tmp=malloc(5*8+5);
5207d9f8 422 if (!tmp) {
c4633554 423 debuga(_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8
FM
424 return(-1);
425 }
426 sprintf(tmp,"%c%x:%x:%x:%x:%x:%x:%x:%x/%d",ALIAS_PREFIX,ipv6[0],ipv6[1],ipv6[2],ipv6[3],ipv6[4],ipv6[5],ipv6[6],ipv6[7],nbits);
427 new_alias->Alias=tmp;
428 }
bd43d81f 429
5207d9f8
FM
430 if (prev_alias) {
431 new_alias->Next=prev_alias->Next;
432 prev_alias->Next=new_alias;
433 } else {
434 new_alias->Next=NULL;
435 FirstAliasIpv6=new_alias;
436 }
437 return(1);
438}
439
6e24f222 440#ifdef USE_PCRE
5207d9f8 441/*!
6e24f222 442Store a regular expression to match the alias.
5207d9f8 443
6e24f222
FM
444\retval 1 Alias added.
445\retval 0 Ignore the line.
446\retval -1 Error.
5207d9f8 447*/
6e24f222
FM
448static int Alias_StoreRegexp(char *buf)
449{
450 char Delimiter;
451 char *End;
452 struct hostalias_regex *alias;
453 struct hostalias_regex *new_alias;
454 struct hostalias_regex **prev_alias;
455 const char *PcreError;
456 int ErrorOffset;
457 char *Replace;
458 int len;
459 char *tmp;
a16cb22a 460 int i;
bd43d81f 461
6e24f222
FM
462 // find the pattern
463 Delimiter=*buf++;
464 for (End=buf ; *End && *End!=Delimiter ; End++) {
465 if (*End=='\\') {
466 if (End[1]=='\0') {
467 debuga(_("Invalid NUL character found in regular expression\n"));
468 return(-1);
469 }
470 End++; //ignore the escaped character
471 }
472 }
473 if (*End!=Delimiter) {
474 debuga(_("Unterminated regular expression\n"));
475 return(-1);
476 }
477 *End++='\0';
bd43d81f 478
6e24f222
FM
479 // find the alias
480 for (Replace=End ; *Replace==' ' || *Replace=='\t' ; Replace++);
481 for (End=Replace ; *End && (unsigned char)*End>' ' ; End++);
482 *End='\0';
bd43d81f 483
6e24f222
FM
484 // store it
485 new_alias=malloc(sizeof(*new_alias));
486 if (!new_alias) {
487 debuga(_("Not enough memory to store the host name aliasing directives\n"));
488 return(-1);
489 }
490 new_alias->Next=NULL;
491 new_alias->Re=pcre_compile(buf,0,&PcreError,&ErrorOffset,NULL);
492 if (new_alias->Re==NULL) {
e3c57e9e 493 debuga(_("Failed to compile the regular expression \"%s\": %s\n"),buf,PcreError);
6e24f222
FM
494 free(new_alias);
495 return(-1);
496 }
497 len=strlen(Replace);
498 tmp=malloc(len+2);
499 if (!tmp) {
500 debuga(_("Not enough memory to store the host name aliasing directives\n"));
501 pcre_free(new_alias->Re);
502 return(-1);
503 }
504 tmp[0]=ALIAS_PREFIX;
505 memcpy(tmp+1,Replace,len);
506 tmp[len+1]='\0';
507 new_alias->Alias=tmp;
bd43d81f 508
a16cb22a
FM
509 new_alias->SubPartern=false;
510 for (i=1 ; tmp[i] ; i++)
511 // both the sed \1 and the perl $1 replacement operators are accepted
512 if ((tmp[i]=='\\' || tmp[i]=='$') && isdigit(tmp[i+1])) {
513 new_alias->SubPartern=true;
514 break;
515 }
bd43d81f 516
6e24f222
FM
517 // chain it
518 prev_alias=&FirstAliasRe;
519 for (alias=FirstAliasRe ; alias ; alias=alias->Next)
520 prev_alias=&alias->Next;
521 *prev_alias=new_alias;
bd43d81f 522
6e24f222
FM
523 return(1);
524}
525#endif
526
527/*!
528Store an alias in the corresponding list.
529
530\param buf The string to parse and store.
531
532\retval 0 No error.
533\retval -1 Error in file.
534*/
535static int Alias_Store(char *buf)
5207d9f8 536{
5207d9f8 537 int type;
7819e0d5 538 const char *name;
5207d9f8
FM
539 unsigned char ipv4[4];
540 unsigned short int ipv6[8];
541 int nbits;
7819e0d5 542 const char *next;
6e24f222 543 int Error;
bd43d81f 544
a9567aa0 545 if (strncasecmp(buf,"re:",3)==0) {
6e24f222 546#ifdef USE_PCRE
a9567aa0 547 if (Alias_StoreRegexp(buf+3)<0)
6e24f222 548 return(-1);
6e24f222 549 return(0);
a9567aa0
FM
550#else
551 debuga(_("PCRE not compiled in therefore the regular expressions are not available in the host alias file\n"));
552 return(-1);
553#endif
554 }
555 type=extract_address_mask(buf,&name,ipv4,ipv6,&nbits,&next);
556 if (type<0) {
557 return(-1);
558 }
559
560 if (type==1) {
561 Error=Alias_StoreName(name,next);
562 } else if (type==2) {
563 Error=Alias_StoreIpv4(ipv4,nbits,next);
564 } else if (type==3) {
565 Error=Alias_StoreIpv6(ipv6,nbits,next);
566 }
567 if (Error<0) return(-1);
568 return(0);
6e24f222
FM
569}
570
571/*!
572Read the file containing the host names to alias in the report.
573
574\param Filename The name of the file.
575*/
576void read_hostalias(const char *Filename)
577{
578 FILE *fi;
579 longline line;
580 char *buf;
5207d9f8
FM
581
582 if (debug) debuga(_("Reading host alias file \"%s\"\n"),Filename);
583 fi=fopen(Filename,"rt");
584 if (!fi) {
585 debuga(_("Cannot read host name alias file \"%s\" - %s\n"),Filename,strerror(errno));
586 exit(EXIT_FAILURE);
587 }
bd43d81f 588
5207d9f8
FM
589 if ((line=longline_create())==NULL) {
590 debuga(_("Not enough memory to read the host name aliases\n"));
591 exit(EXIT_FAILURE);
592 }
593
594 while ((buf=longline_read(fi,line)) != NULL) {
6e24f222 595 if (Alias_Store(buf)<0) {
c4633554 596 debuga(_("While reading \"%s\"\n"),Filename);
5207d9f8
FM
597 exit(EXIT_FAILURE);
598 }
22715352 599 }
bd43d81f 600
22715352
FM
601 longline_destroy(&line);
602 fclose(fi);
bd43d81f 603
22715352 604 if (debug) {
5207d9f8
FM
605 struct hostalias_name *alias1;
606 struct hostalias_ipv4 *alias4;
607 struct hostalias_ipv6 *alias6;
608
22715352 609 debuga(_("List of host names to alias:\n"));
5207d9f8
FM
610 for (alias1=FirstAliasName ; alias1 ; alias1=alias1->Next) {
611 if (alias1->HostName_Prefix && alias1->HostName_Suffix)
612 debuga(_(" %s*%s => %s\n"),alias1->HostName_Prefix,alias1->HostName_Suffix,alias1->Alias);
613 else if (alias1->HostName_Prefix)
614 debuga(_(" %s => %s\n"),alias1->HostName_Prefix,alias1->Alias);
22715352 615 else
5207d9f8
FM
616 debuga(_(" *%s => %s\n"),alias1->HostName_Suffix,alias1->Alias);
617 }
618 for (alias4=FirstAliasIpv4 ; alias4 ; alias4=alias4->Next) {
619 debuga(_(" %d.%d.%d.%d/%d => %s\n"),alias4->Ip[0],alias4->Ip[1],alias4->Ip[2],alias4->Ip[3],alias4->NBits,alias4->Alias);
620 }
621 for (alias6=FirstAliasIpv6 ; alias6 ; alias6=alias6->Next) {
622 debuga(_(" %x:%x:%x:%x:%x:%x:%x:%x/%d => %s\n"),alias6->Ip[0],alias6->Ip[1],alias6->Ip[2],alias6->Ip[3],
623 alias6->Ip[4],alias6->Ip[5],alias6->Ip[6],alias6->Ip[7],alias6->NBits,alias6->Alias);
22715352
FM
624 }
625 }
626}
627
628/*!
629Free the memory allocated by read_hostalias().
630*/
631void free_hostalias(void)
632{
6e24f222
FM
633 {
634 struct hostalias_name *alias1;
635 struct hostalias_name *next1;
bd43d81f 636
6e24f222
FM
637 for (alias1=FirstAliasName ; alias1 ; alias1=next1) {
638 next1=alias1->Next;
639 if (alias1->HostName_Prefix) free((void *)alias1->HostName_Prefix);
640 if (alias1->HostName_Suffix) free((void *)alias1->HostName_Suffix);
641 free((void *)alias1->Alias);
642 free(alias1);
643 }
644 FirstAliasName=NULL;
645 }
646 {
647 struct hostalias_ipv4 *alias4;
648 struct hostalias_ipv4 *next4;
bd43d81f 649
6e24f222
FM
650 for (alias4=FirstAliasIpv4 ; alias4 ; alias4=next4) {
651 next4=alias4->Next;
652 free((void *)alias4->Alias);
653 free(alias4);
654 }
655 FirstAliasIpv4=NULL;
656 }
657 {
658 struct hostalias_ipv6 *alias6;
659 struct hostalias_ipv6 *next6;
bd43d81f 660
6e24f222
FM
661 for (alias6=FirstAliasIpv6 ; alias6 ; alias6=next6) {
662 next6=alias6->Next;
663 free((void *)alias6->Alias);
664 free(alias6);
665 }
666 FirstAliasIpv6=NULL;
667 }
668#ifdef USE_PCRE
669 {
670 struct hostalias_regex *alias;
671 struct hostalias_regex *next;
bd43d81f 672
6e24f222
FM
673 for (alias=FirstAliasRe ; alias ; alias=next) {
674 next=alias->Next;
675 pcre_free(alias->Re);
676 free((void *)alias->Alias);
677 free(alias);
678 }
679 FirstAliasRe=NULL;
680 }
681#endif
22715352
FM
682}
683
684/*!
685Replace the host name by its alias if it is in our list.
686
5207d9f8 687\param url The host name.
22715352
FM
688
689\return The pointer to the host name or its alias.
690*/
6e24f222 691static const char *alias_url_name(const char *url,const char *next)
22715352 692{
5207d9f8 693 struct hostalias_name *alias;
22715352
FM
694 int len;
695
7819e0d5 696 len=(int)(next-url);
5207d9f8 697 for (alias=FirstAliasName ; alias ; alias=alias->Next) {
22715352
FM
698 if (len<alias->MinLen) continue;
699 if (alias->HostName_Prefix) {
700 if (alias->HostName_Suffix) {
701 if (strncasecmp(url,alias->HostName_Prefix,alias->PrefixLen)==0 &&
7819e0d5 702 strncasecmp(url+(len-alias->SuffixLen),alias->HostName_Suffix,len)==0) {
22715352
FM
703 return(alias->Alias);
704 }
705 } else {
7819e0d5 706 if (len==alias->PrefixLen && strncasecmp(url,alias->HostName_Prefix,len)==0) {
22715352
FM
707 return(alias->Alias);
708 }
709 }
7819e0d5 710 } else if (strncasecmp(url+(len-alias->SuffixLen),alias->HostName_Suffix,len)==0) {
22715352
FM
711 return(alias->Alias);
712 }
713 }
714 return(url);
715}
716
5207d9f8
FM
717/*!
718Replace the IPv4 address by its alias if it is in our list.
719
720\param url The host name.
721\param ipv4 The address.
722
723\return The pointer to the host name or its alias.
724*/
6e24f222 725static const char *alias_url_ipv4(const char *url,unsigned char *ipv4)
5207d9f8
FM
726{
727 struct hostalias_ipv4 *alias;
728 int len;
729
730 for (alias=FirstAliasIpv4 ; alias ; alias=alias->Next) {
bd43d81f 731 len=alias->NBits;
5207d9f8
FM
732 if ((len<8 || memcmp(ipv4,alias->Ip,len/8)==0) && ((len%8)==0 || (ipv4[len/8] ^ alias->Ip[len/8]) & (0xFFU<<(8-len%8)))==0) {
733 return(alias->Alias);
734 }
735 }
736 return(url);
737}
738
0ec4b481
FM
739/*!
740Replace the IPv6 address by its alias if it is in our list.
741
742\param url The host name.
743\param ipv6 The address.
744
745\return The pointer to the host name or its alias.
746*/
6e24f222 747static const char *alias_url_ipv6(const char *url,unsigned short int *ipv6)
0ec4b481
FM
748{
749 struct hostalias_ipv6 *alias;
750 int len;
751 int i;
752
753 for (alias=FirstAliasIpv6 ; alias ; alias=alias->Next) {
754 len=alias->NBits;
755 for (i=len/16-1 ; i>=0 && ipv6[i]==alias->Ip[i] ; i--);
756 if (i>=0) continue;
757 i=len/16;
758 if (i>=8 || len%16==0 || ((ipv6[i] ^ alias->Ip[i]) & (0xFFFF<<(len-i*16)))==0) {
759 return(alias->Alias);
760 }
761 }
762 return(url);
763}
764
6e24f222
FM
765#ifdef USE_PCRE
766/*!
767Replace the host name by its alias if it is in our list.
768
a16cb22a
FM
769\param url_ptr A pointer to the host name to match. It is replaced
770by a pointer to the alias if a match is found.
6e24f222 771
a16cb22a
FM
772\return \c True if a match is found or \c false if it failed.
773
774\warning The function is not thread safe as it may return a static
775internal buffer.
6e24f222 776*/
a16cb22a 777static bool alias_url_regex(const char **url_ptr)
6e24f222
FM
778{
779 struct hostalias_regex *alias;
a16cb22a
FM
780 int nmatches;
781 const char *url;
782 int url_len;
783 int ovector[30];//size must be a multiple of 3
784 static char Replacement[1024];
785 const char *str;
786 int i;
787 int sub;
788 int repl_idx;
6e24f222 789
a16cb22a
FM
790 url=*url_ptr;
791 url_len=strlen(url);
6e24f222 792 for (alias=FirstAliasRe ; alias ; alias=alias->Next) {
a16cb22a
FM
793 nmatches=pcre_exec(alias->Re,NULL,url,url_len,0,0,ovector,sizeof(ovector)/sizeof(ovector[0]));
794 if (nmatches>=0) {
795 if (nmatches==0) nmatches=(int)(sizeof(ovector)/sizeof(ovector[0]))/3*2; //only 2/3 of the vector is used by pcre_exec
796 if (nmatches==1 || !alias->SubPartern) { //no subpattern to replace
797 *url_ptr=alias->Alias;
798 } else {
799 repl_idx=0;
800 str=alias->Alias;
801 for (i=0 ; str[i] ; i++) {
802 // both the sed \1 and the perl $1 replacement operators are accepted
803 if ((str[i]=='\\' || str[i]=='$') && isdigit(str[i+1])) {
804 sub=str[++i]-'0';
805 if (sub>=1 && sub<=nmatches) {
806 /*
807 * ovector[sub] is the start position of the match.
808 * ovector[sub+1] is the end position of the match.
809 */
810 sub<<=1;
811 if (repl_idx+ovector[sub+1]-ovector[sub]>=sizeof(Replacement)-1) break;
812 memcpy(Replacement+repl_idx,url+ovector[sub],ovector[sub+1]-ovector[sub]);
813 repl_idx+=ovector[sub+1]-ovector[sub];
814 continue;
815 }
816 }
817 if (repl_idx>=sizeof(Replacement)-1) break;
818 Replacement[repl_idx++]=str[i];
819 }
820 Replacement[repl_idx]='\0';
821 *url_ptr=Replacement;
822 }
823 return(true);
6e24f222
FM
824 }
825 }
a16cb22a 826 return(false);
6e24f222
FM
827}
828#endif
829
6fa33a32
FM
830/*!
831Find the beginning of the URL beyond the scheme://
832
833\param url The url possibly containing a scheme.
834
835\return The beginning of the url beyond the scheme.
836*/
837const char *skip_scheme(const char *url)
838{
839 const char *str;
bd43d81f 840
6fa33a32
FM
841 /*
842 Skip any scheme:// at the beginning of the URL (see rfc2396 section 3.1).
843 The underscore is not part of the standard but is found in the squid logs as cache_object://.
844 */
845 for (str=url ; *str && (isalnum(*str) || *str=='+' || *str=='-' || *str=='.' || *str=='_') ; str++);
846 if (str[0]==':' && str[1]=='/' && str[2]=='/') {
847 url=str+3;
848 while (*url=='/') url++;
849 }
850 return(url);
851}
852
22715352
FM
853/*!
854Get the part of the URL necessary to generate the report.
855
856\param url The URL as extracted from the report.
1a2609b0
FM
857\param full_url \c True to keep the whole URL. If \c false,
858the URL is truncated to only keep the host name and port number.
22715352 859*/
e2379f05 860const char *process_url(const char *url,bool full_url)
22715352 861{
e2379f05
FM
862 static char short_url[1024];
863 int i;
22715352 864 const char *start;
5207d9f8 865 int type;
5207d9f8
FM
866 unsigned char ipv4[4];
867 unsigned short int ipv6[8];
7819e0d5 868 const char *next;
22715352 869
6fa33a32 870 start=skip_scheme(url);
1a2609b0 871 if (!full_url) {
e2379f05
FM
872 for (i=0 ; i<sizeof(short_url)-1 && start[i] && start[i]!='/' && start[i]!='?' ; i++)
873 short_url[i]=start[i];
874 short_url[i]='\0';
875 start=short_url;
6e24f222
FM
876#ifdef USE_PCRE
877 if (FirstAliasRe) {
a16cb22a 878 if (alias_url_regex(&start)) return(start);
6e24f222
FM
879 }
880#endif
a16cb22a 881 type=extract_address_mask(start,NULL,ipv4,ipv6,NULL,&next);
5207d9f8
FM
882 if (type==1) {
883 if (FirstAliasName)
7819e0d5 884 start=alias_url_name(start,next);
5207d9f8
FM
885 } else if (type==2) {
886 if (FirstAliasIpv4)
887 start=alias_url_ipv4(start,ipv4);
0ec4b481
FM
888 } else if (type==3) {
889 if (FirstAliasIpv6)
890 start=alias_url_ipv6(start,ipv6);
5207d9f8 891 }
22715352
FM
892 }
893 return(start);
894}
895
896/*!
897Extract the host name from the URL.
898
899\param url The url whose host name must be extracted.
900\param hostname The buffer to store the host name.
901\param hostsize The size of the host name buffer.
902
903\note The function is stupid at this time. It just searches for the first slash
904in the URL and truncates the URL there. It doesn't take the protocol into account
905nor the port number nor any user or password information.
906*/
907void url_hostname(const char *url,char *hostname,int hostsize)
908{
909 int i;
910
911 hostsize--;
912 for (i=0 ; i<hostsize && url[i] && url[i]!='/' ; i++)
913 hostname[i]=url[i];
914 hostname[i]='\0';
915}
916