]> git.ipfire.org Git - thirdparty/sarg.git/blame - url.c
Rename configure.in as configure.ac
[thirdparty/sarg.git] / url.c
CommitLineData
22715352
FM
1/*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
110ce984 3 * 1998, 2015
22715352
FM
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27#include "include/conf.h"
28#include "include/defs.h"
6e24f222
FM
29#ifdef HAVE_PCRE_H
30#include <pcre.h>
31#define USE_PCRE
6e24f222 32#endif
22715352
FM
33
34/*!
35A host name and the name to report.
36*/
5207d9f8 37struct hostalias_name
22715352
FM
38{
39 //! The next host name in the list or NULL for the last item.
5207d9f8 40 struct hostalias_name *Next;
22715352
FM
41 //! The minimum length of a candidate host name.
42 int MinLen;
43 //! The length of the constant part at the beginning of the mask.
44 int PrefixLen;
45 //! The length of the constant part at the end of the mask.
46 int SuffixLen;
47 //! The first part of the mask of the host name.
48 const char *HostName_Prefix;
49 //! The second part of the mask of the host name.
50 const char *HostName_Suffix;
51 //! The replacement name.
52 const char *Alias;
53};
54
5207d9f8
FM
55/*!
56An IPv4 address and the name to report.
57*/
58struct hostalias_ipv4
59{
60 //! The next host name in the list or NULL for the last item.
61 struct hostalias_ipv4 *Next;
62 //! The IP address.
63 unsigned char Ip[4];
64 //! The number of bits in the prefix.
65 int NBits;
66 //! The replacement name.
67 const char *Alias;
68};
69
70/*!
71An IPv6 address and the name to report.
72*/
73struct hostalias_ipv6
74{
75 //! The next host name in the list or NULL for the last item.
76 struct hostalias_ipv6 *Next;
77 //! The IP address.
8b88fb66 78 unsigned short Ip[8];
5207d9f8
FM
79 //! The number of bits in the prefix.
80 int NBits;
81 //! The replacement name.
82 const char *Alias;
83};
84
6e24f222
FM
85#ifdef USE_PCRE
86/*!
87A regular expression.
88*/
89struct hostalias_regex
90{
91 //! The next regular expression to test.
92 struct hostalias_regex *Next;
93 //! The regular expression to match against the host name.
94 pcre *Re;
95 //! The replacement name.
96 const char *Alias;
a16cb22a
FM
97 //! \c True if this regular expression contains at least one subpattern
98 bool SubPartern;
6e24f222
FM
99};
100#endif
101
22715352 102//! The first host name.
5207d9f8
FM
103static struct hostalias_name *FirstAliasName=NULL;
104//! The first IPv4 address.
105static struct hostalias_ipv4 *FirstAliasIpv4=NULL;
106//! The first IPv§ address.
107static struct hostalias_ipv6 *FirstAliasIpv6=NULL;
22715352 108
6e24f222
FM
109#ifdef USE_PCRE
110static struct hostalias_regex *FirstAliasRe=NULL;
111#endif
112
22715352 113/*!
5207d9f8 114 Store a name to alias.
22715352 115
5207d9f8
FM
116 \param name The name to match including the wildcard.
117 \param next A pointer to the first character after the name.
118
119 \retval 1 Alias added.
120 \retval 0 Ignore the line.
121 \retval -1 Error.
122 */
7819e0d5 123static int Alias_StoreName(const char *name,const char *next)
22715352 124{
7819e0d5
FM
125 const char *NameBegin;
126 const char *NameBeginE;
127 const char *NameEnd;
128 const char *NameEndE;
129 const char *Replace;
130 const char *ReplaceE;
131 const char *str;
5207d9f8
FM
132 char sep;
133 struct hostalias_name *alias;
134 struct hostalias_name *new_alias;
135 struct hostalias_name *prev_alias;
67a93701 136 char *tmp;
7819e0d5 137 int len;
22715352 138
5207d9f8 139 if (*name=='#' || *name==';') return(0);
22715352 140
5207d9f8
FM
141 // get host name and split at the wildcard
142 NameBegin=name;
7819e0d5
FM
143 for (str=NameBegin ; str<next && (unsigned char)*str>' ' && *str!='*' ; str++);
144 NameBeginE=str;
145 if (NameBegin==NameBeginE) NameBegin=NULL;
146 if (str<next && *str=='*') {
147 NameEnd=++str;
148 while (str<next && (unsigned char)*str>' ') {
5207d9f8 149 if (*str=='*') {
af961877 150 debuga(__FILE__,__LINE__,_("Host name alias \"%s*%s\" contains too many wildcards (*)\n"),NameBegin,NameEnd);
5207d9f8 151 return(-1);
22715352 152 }
5207d9f8 153 str++;
22715352 154 }
7819e0d5
FM
155 NameEndE=str;
156 if (NameEnd==NameEndE) {
af961877 157 debuga(__FILE__,__LINE__,_("Host name alias \"%*s\" must not end with a wildcard\n"),(int)(next-name),name);
7819e0d5
FM
158 return(-1);
159 }
160 } else {
5207d9f8 161 NameEnd=NULL;
5207d9f8 162 }
7819e0d5 163 while (str<next && (unsigned char)*str<=' ') str++;
5207d9f8 164 if (!NameBegin && !NameEnd) return(0);
22715352 165
5207d9f8 166 // get the alias
7819e0d5 167 sep=*next;
5207d9f8
FM
168 if (sep==' ' || sep=='\t') {
169 Replace=next;
22715352
FM
170 while (*Replace==' ' || *Replace=='\t') Replace++;
171 if ((unsigned char)*Replace<' ') {
172 Replace=NULL;
173 } else {
174 for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
7819e0d5 175 ReplaceE=str;
22715352 176 }
5207d9f8
FM
177 } else
178 Replace=NULL;
22715352 179
7819e0d5
FM
180 if (NameBegin) {
181 len=(int)(NameBeginE-NameBegin);
182 tmp=malloc(len+1);
183 if (!tmp) {
af961877 184 debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
7819e0d5
FM
185 return(-1);
186 }
187 memcpy(tmp,NameBegin,len);
188 tmp[len]='\0';
189 NameBegin=tmp;
190 }
191 if (NameEnd) {
192 len=(int)(NameEndE-NameEnd);
193 tmp=malloc(len+1);
194 if (!tmp) {
195 if (NameBegin) free((void*)NameBegin);
af961877 196 debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
7819e0d5
FM
197 return(-1);
198 }
199 memcpy(tmp,NameEnd,len);
200 tmp[len]='\0';
201 NameEnd=tmp;
202 }
bd43d81f 203
5207d9f8
FM
204 // ignore duplicates
205 prev_alias=NULL;
206 for (alias=FirstAliasName ; alias ; alias=alias->Next) {
207 if (((NameBegin && alias->HostName_Prefix && !strcmp(NameBegin,alias->HostName_Prefix)) || (!NameBegin && !alias->HostName_Prefix)) &&
208 ((NameEnd && alias->HostName_Suffix && !strcmp(NameEnd,alias->HostName_Suffix)) || (!NameEnd && !alias->HostName_Suffix))) {
7819e0d5 209 if (NameBegin) free((void*)NameBegin);
5207d9f8 210 return(0);
22715352 211 }
5207d9f8
FM
212 prev_alias=alias;
213 }
22715352 214
5207d9f8
FM
215 // insert into the list
216 new_alias=malloc(sizeof(*new_alias));
217 if (!new_alias) {
7819e0d5
FM
218 if (NameBegin) free((void*)NameBegin);
219 if (NameEnd) free((void*)NameEnd);
af961877 220 debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8
FM
221 return(-1);
222 }
223 new_alias->MinLen=0;
224 if (NameBegin) {
7819e0d5 225 new_alias->HostName_Prefix=NameBegin;
5207d9f8
FM
226 new_alias->MinLen+=strlen(NameBegin);
227 new_alias->PrefixLen=strlen(NameBegin);
228 } else {
229 new_alias->HostName_Prefix=NULL;
230 new_alias->PrefixLen=0;
231 }
232 if (NameEnd) {
7819e0d5 233 new_alias->HostName_Suffix=NameEnd;
5207d9f8
FM
234 new_alias->MinLen+=strlen(NameEnd)+1;
235 new_alias->SuffixLen=strlen(NameEnd);
236 } else {
237 new_alias->HostName_Suffix=NULL;
238 new_alias->SuffixLen=0;
239 }
240 if (Replace) {
7819e0d5
FM
241 len=(int)(ReplaceE-Replace);
242 tmp=malloc(len+2);
5207d9f8 243 if (!tmp) {
af961877 244 debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8 245 return(-1);
22715352 246 }
5207d9f8 247 tmp[0]=ALIAS_PREFIX;
7819e0d5
FM
248 memcpy(tmp+1,Replace,len);
249 tmp[len+1]='\0';
5207d9f8
FM
250 new_alias->Alias=tmp;
251 } else {
252 tmp=malloc(new_alias->MinLen+2);
253 if (!tmp) {
af961877 254 debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8
FM
255 return(-1);
256 }
257 tmp[0]=ALIAS_PREFIX;
258 if (new_alias->HostName_Prefix) strcpy(tmp+1,new_alias->HostName_Prefix);
259 if (new_alias->HostName_Suffix) {
260 tmp[new_alias->PrefixLen+1]='*';
261 strcpy(tmp+new_alias->PrefixLen+2,new_alias->HostName_Suffix);
262 }
263 new_alias->Alias=tmp;
264 }
bd43d81f 265
5207d9f8
FM
266 new_alias->Next=NULL;
267 if (prev_alias)
268 prev_alias->Next=new_alias;
269 else
270 FirstAliasName=new_alias;
271 return(1);
272}
273
274/*!
275 Store a IPv4 to alias.
276
277 \param ipv4 The IPv4 to match.
278 \param nbits The number of bits in the prefix
279 \param next A pointer to the first character after the address.
280
281 \retval 1 Alias added.
282 \retval 0 Ignore the line.
283 \retval -1 Error.
284 */
7819e0d5 285static int Alias_StoreIpv4(unsigned char *ipv4,int nbits,const char *next)
5207d9f8 286{
7819e0d5
FM
287 const char *Replace;
288 const char *ReplaceE;
289 const char *str;
5207d9f8
FM
290 struct hostalias_ipv4 *alias;
291 struct hostalias_ipv4 *new_alias;
292 struct hostalias_ipv4 *prev_alias;
293 int i;
294 char *tmp;
7819e0d5 295 int len;
5207d9f8
FM
296
297 // get the alias
298 Replace=next;
299 while (*Replace==' ' || *Replace=='\t') Replace++;
300 if ((unsigned char)*Replace<' ') {
301 Replace=NULL;
302 } else {
303 for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
7819e0d5 304 ReplaceE=str;
5207d9f8
FM
305 }
306
307 // store more restrictive range first
308 prev_alias=NULL;
309 for (alias=FirstAliasIpv4 ; alias ; alias=alias->Next) {
310 i=(nbits<alias->NBits) ? nbits : alias->NBits;
311 if ((i<8 || memcmp(ipv4,alias->Ip,i/8)==0) && ((i%8)==0 || (ipv4[i/8] ^ alias->Ip[i/8]) & (0xFFU<<(8-i%8)))==0) {
312 break;
313 }
314 prev_alias=alias;
315 }
316
317 // insert into the list
318 new_alias=malloc(sizeof(*new_alias));
319 if (!new_alias) {
af961877 320 debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8
FM
321 return(-1);
322 }
323 memcpy(new_alias->Ip,ipv4,4);
324 new_alias->NBits=nbits;
325 if (Replace) {
7819e0d5
FM
326 len=(int)(ReplaceE-Replace);
327 tmp=malloc(len+2);
5207d9f8 328 if (!tmp) {
af961877 329 debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8
FM
330 return(-1);
331 }
332 tmp[0]=ALIAS_PREFIX;
7819e0d5
FM
333 memcpy(tmp+1,Replace,len);
334 tmp[len+1]='\0';
5207d9f8
FM
335 new_alias->Alias=tmp;
336 } else {
337 tmp=malloc(5*4+1);
338 if (!tmp) {
af961877 339 debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8 340 return(-1);
22715352 341 }
5207d9f8
FM
342 sprintf(tmp,"%c%d.%d.%d.%d/%d",ALIAS_PREFIX,ipv4[0],ipv4[1],ipv4[2],ipv4[3],nbits);
343 new_alias->Alias=tmp;
344 }
bd43d81f 345
5207d9f8
FM
346 if (prev_alias) {
347 new_alias->Next=prev_alias->Next;
348 prev_alias->Next=new_alias;
349 } else {
22715352 350 new_alias->Next=NULL;
5207d9f8
FM
351 FirstAliasIpv4=new_alias;
352 }
353 return(1);
354}
355
356/*!
357 Store a IPv6 to alias.
358
359 \param ipv6 The IPv6 to match.
360 \param nbits The number of bits in the prefix
361 \param next A pointer to the first character after the address.
362
363 \retval 1 Alias added.
364 \retval 0 Ignore the line.
365 \retval -1 Error.
366 */
7819e0d5 367static int Alias_StoreIpv6(unsigned short *ipv6,int nbits,const char *next)
5207d9f8 368{
7819e0d5
FM
369 const char *Replace;
370 const char *ReplaceE;
371 const char *str;
5207d9f8
FM
372 struct hostalias_ipv6 *alias;
373 struct hostalias_ipv6 *new_alias;
374 struct hostalias_ipv6 *prev_alias;
375 int i;
376 char *tmp;
7819e0d5 377 int len;
5207d9f8
FM
378
379 // get the alias
380 Replace=next;
381 while (*Replace==' ' || *Replace=='\t') Replace++;
382 if ((unsigned char)*Replace<' ') {
383 Replace=NULL;
384 } else {
385 for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
7819e0d5 386 ReplaceE=str;
5207d9f8
FM
387 }
388
389 // store more restrictive range first
390 prev_alias=NULL;
391 for (alias=FirstAliasIpv6 ; alias ; alias=alias->Next) {
392 i=(nbits<alias->NBits) ? nbits : alias->NBits;
393 if ((i<16 || memcmp(ipv6,alias->Ip,i/16*2)==0) && ((i%16)==0 || (ipv6[i/16] ^ alias->Ip[i/16]) & (0xFFFFU<<(16-i%16)))==0) {
394 break;
395 }
396 prev_alias=alias;
397 }
398
399 // insert into the list
400 new_alias=malloc(sizeof(*new_alias));
401 if (!new_alias) {
af961877 402 debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8
FM
403 return(-1);
404 }
0ec4b481 405 memcpy(new_alias->Ip,ipv6,8*sizeof(unsigned short int));
5207d9f8
FM
406 new_alias->NBits=nbits;
407 if (Replace) {
7819e0d5
FM
408 len=ReplaceE-Replace;
409 tmp=malloc(len+2);
5207d9f8 410 if (!tmp) {
af961877 411 debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8
FM
412 return(-1);
413 }
414 tmp[0]=ALIAS_PREFIX;
7819e0d5
FM
415 memcpy(tmp+1,Replace,len);
416 tmp[len+1]='\0';
5207d9f8
FM
417 new_alias->Alias=tmp;
418 } else {
0ec4b481 419 tmp=malloc(5*8+5);
5207d9f8 420 if (!tmp) {
af961877 421 debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
5207d9f8
FM
422 return(-1);
423 }
424 sprintf(tmp,"%c%x:%x:%x:%x:%x:%x:%x:%x/%d",ALIAS_PREFIX,ipv6[0],ipv6[1],ipv6[2],ipv6[3],ipv6[4],ipv6[5],ipv6[6],ipv6[7],nbits);
425 new_alias->Alias=tmp;
426 }
bd43d81f 427
5207d9f8
FM
428 if (prev_alias) {
429 new_alias->Next=prev_alias->Next;
430 prev_alias->Next=new_alias;
431 } else {
432 new_alias->Next=NULL;
433 FirstAliasIpv6=new_alias;
434 }
435 return(1);
436}
437
6e24f222 438#ifdef USE_PCRE
5207d9f8 439/*!
6e24f222 440Store a regular expression to match the alias.
5207d9f8 441
6e24f222
FM
442\retval 1 Alias added.
443\retval 0 Ignore the line.
444\retval -1 Error.
5207d9f8 445*/
6e24f222
FM
446static int Alias_StoreRegexp(char *buf)
447{
448 char Delimiter;
449 char *End;
450 struct hostalias_regex *alias;
451 struct hostalias_regex *new_alias;
452 struct hostalias_regex **prev_alias;
453 const char *PcreError;
454 int ErrorOffset;
455 char *Replace;
456 int len;
457 char *tmp;
a16cb22a 458 int i;
bd43d81f 459
6e24f222
FM
460 // find the pattern
461 Delimiter=*buf++;
462 for (End=buf ; *End && *End!=Delimiter ; End++) {
463 if (*End=='\\') {
464 if (End[1]=='\0') {
af961877 465 debuga(__FILE__,__LINE__,_("Invalid NUL character found in regular expression\n"));
6e24f222
FM
466 return(-1);
467 }
468 End++; //ignore the escaped character
469 }
470 }
471 if (*End!=Delimiter) {
af961877 472 debuga(__FILE__,__LINE__,_("Unterminated regular expression\n"));
6e24f222
FM
473 return(-1);
474 }
475 *End++='\0';
bd43d81f 476
6e24f222
FM
477 // find the alias
478 for (Replace=End ; *Replace==' ' || *Replace=='\t' ; Replace++);
479 for (End=Replace ; *End && (unsigned char)*End>' ' ; End++);
480 *End='\0';
bd43d81f 481
6e24f222
FM
482 // store it
483 new_alias=malloc(sizeof(*new_alias));
484 if (!new_alias) {
af961877 485 debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
6e24f222
FM
486 return(-1);
487 }
488 new_alias->Next=NULL;
489 new_alias->Re=pcre_compile(buf,0,&PcreError,&ErrorOffset,NULL);
490 if (new_alias->Re==NULL) {
af961877 491 debuga(__FILE__,__LINE__,_("Failed to compile the regular expression \"%s\": %s\n"),buf,PcreError);
6e24f222
FM
492 free(new_alias);
493 return(-1);
494 }
495 len=strlen(Replace);
496 tmp=malloc(len+2);
497 if (!tmp) {
af961877 498 debuga(__FILE__,__LINE__,_("Not enough memory to store the host name aliasing directives\n"));
6e24f222
FM
499 pcre_free(new_alias->Re);
500 return(-1);
501 }
502 tmp[0]=ALIAS_PREFIX;
503 memcpy(tmp+1,Replace,len);
504 tmp[len+1]='\0';
505 new_alias->Alias=tmp;
bd43d81f 506
a16cb22a
FM
507 new_alias->SubPartern=false;
508 for (i=1 ; tmp[i] ; i++)
509 // both the sed \1 and the perl $1 replacement operators are accepted
510 if ((tmp[i]=='\\' || tmp[i]=='$') && isdigit(tmp[i+1])) {
511 new_alias->SubPartern=true;
512 break;
513 }
bd43d81f 514
6e24f222
FM
515 // chain it
516 prev_alias=&FirstAliasRe;
517 for (alias=FirstAliasRe ; alias ; alias=alias->Next)
518 prev_alias=&alias->Next;
519 *prev_alias=new_alias;
bd43d81f 520
6e24f222
FM
521 return(1);
522}
523#endif
524
525/*!
526Store an alias in the corresponding list.
527
528\param buf The string to parse and store.
529
530\retval 0 No error.
531\retval -1 Error in file.
924c8054 532\retval -2 Unknown string type to store.
6e24f222
FM
533*/
534static int Alias_Store(char *buf)
5207d9f8 535{
5207d9f8 536 int type;
7819e0d5 537 const char *name;
5207d9f8
FM
538 unsigned char ipv4[4];
539 unsigned short int ipv6[8];
540 int nbits;
7819e0d5 541 const char *next;
924c8054 542 int Error=-2;
bd43d81f 543
a9567aa0 544 if (strncasecmp(buf,"re:",3)==0) {
6e24f222 545#ifdef USE_PCRE
a9567aa0 546 if (Alias_StoreRegexp(buf+3)<0)
6e24f222 547 return(-1);
6e24f222 548 return(0);
a9567aa0 549#else
af961877 550 debuga(__FILE__,__LINE__,_("PCRE not compiled in therefore the regular expressions are not available in the host alias file\n"));
a9567aa0
FM
551 return(-1);
552#endif
553 }
554 type=extract_address_mask(buf,&name,ipv4,ipv6,&nbits,&next);
555 if (type<0) {
556 return(-1);
557 }
558
559 if (type==1) {
560 Error=Alias_StoreName(name,next);
561 } else if (type==2) {
562 Error=Alias_StoreIpv4(ipv4,nbits,next);
563 } else if (type==3) {
564 Error=Alias_StoreIpv6(ipv6,nbits,next);
565 }
566 if (Error<0) return(-1);
567 return(0);
6e24f222
FM
568}
569
570/*!
571Read the file containing the host names to alias in the report.
572
573\param Filename The name of the file.
574*/
575void read_hostalias(const char *Filename)
576{
577 FILE *fi;
578 longline line;
579 char *buf;
5207d9f8 580
af961877 581 if (debug) debuga(__FILE__,__LINE__,_("Reading host alias file \"%s\"\n"),Filename);
5207d9f8
FM
582 fi=fopen(Filename,"rt");
583 if (!fi) {
af961877 584 debuga(__FILE__,__LINE__,_("Cannot read host name alias file \"%s\": %s\n"),Filename,strerror(errno));
5207d9f8
FM
585 exit(EXIT_FAILURE);
586 }
bd43d81f 587
5207d9f8 588 if ((line=longline_create())==NULL) {
af961877 589 debuga(__FILE__,__LINE__,_("Not enough memory to read file \"%s\"\n"),Filename);
5207d9f8
FM
590 exit(EXIT_FAILURE);
591 }
592
593 while ((buf=longline_read(fi,line)) != NULL) {
6e24f222 594 if (Alias_Store(buf)<0) {
af961877 595 debuga(__FILE__,__LINE__,_("While reading \"%s\"\n"),Filename);
5207d9f8
FM
596 exit(EXIT_FAILURE);
597 }
22715352 598 }
bd43d81f 599
22715352 600 longline_destroy(&line);
204781f4 601 if (fclose(fi)==EOF) {
af961877 602 debuga(__FILE__,__LINE__,_("Read error in \"%s\": %s\n"),Filename,strerror(errno));
204781f4
FM
603 exit(EXIT_FAILURE);
604 }
bd43d81f 605
22715352 606 if (debug) {
5207d9f8
FM
607 struct hostalias_name *alias1;
608 struct hostalias_ipv4 *alias4;
609 struct hostalias_ipv6 *alias6;
610
af961877 611 debuga(__FILE__,__LINE__,_("List of host names to alias:\n"));
5207d9f8
FM
612 for (alias1=FirstAliasName ; alias1 ; alias1=alias1->Next) {
613 if (alias1->HostName_Prefix && alias1->HostName_Suffix)
af961877 614 debuga(__FILE__,__LINE__,_(" %s*%s => %s\n"),alias1->HostName_Prefix,alias1->HostName_Suffix,alias1->Alias);
5207d9f8 615 else if (alias1->HostName_Prefix)
af961877 616 debuga(__FILE__,__LINE__,_(" %s => %s\n"),alias1->HostName_Prefix,alias1->Alias);
22715352 617 else
af961877 618 debuga(__FILE__,__LINE__,_(" *%s => %s\n"),alias1->HostName_Suffix,alias1->Alias);
5207d9f8
FM
619 }
620 for (alias4=FirstAliasIpv4 ; alias4 ; alias4=alias4->Next) {
af961877 621 debuga(__FILE__,__LINE__,_(" %d.%d.%d.%d/%d => %s\n"),alias4->Ip[0],alias4->Ip[1],alias4->Ip[2],alias4->Ip[3],alias4->NBits,alias4->Alias);
5207d9f8
FM
622 }
623 for (alias6=FirstAliasIpv6 ; alias6 ; alias6=alias6->Next) {
af961877 624 debuga(__FILE__,__LINE__,_(" %x:%x:%x:%x:%x:%x:%x:%x/%d => %s\n"),alias6->Ip[0],alias6->Ip[1],alias6->Ip[2],alias6->Ip[3],
5207d9f8 625 alias6->Ip[4],alias6->Ip[5],alias6->Ip[6],alias6->Ip[7],alias6->NBits,alias6->Alias);
22715352
FM
626 }
627 }
628}
629
630/*!
631Free the memory allocated by read_hostalias().
632*/
633void free_hostalias(void)
634{
6e24f222
FM
635 {
636 struct hostalias_name *alias1;
637 struct hostalias_name *next1;
bd43d81f 638
6e24f222
FM
639 for (alias1=FirstAliasName ; alias1 ; alias1=next1) {
640 next1=alias1->Next;
641 if (alias1->HostName_Prefix) free((void *)alias1->HostName_Prefix);
642 if (alias1->HostName_Suffix) free((void *)alias1->HostName_Suffix);
643 free((void *)alias1->Alias);
644 free(alias1);
645 }
646 FirstAliasName=NULL;
647 }
648 {
649 struct hostalias_ipv4 *alias4;
650 struct hostalias_ipv4 *next4;
bd43d81f 651
6e24f222
FM
652 for (alias4=FirstAliasIpv4 ; alias4 ; alias4=next4) {
653 next4=alias4->Next;
654 free((void *)alias4->Alias);
655 free(alias4);
656 }
657 FirstAliasIpv4=NULL;
658 }
659 {
660 struct hostalias_ipv6 *alias6;
661 struct hostalias_ipv6 *next6;
bd43d81f 662
6e24f222
FM
663 for (alias6=FirstAliasIpv6 ; alias6 ; alias6=next6) {
664 next6=alias6->Next;
665 free((void *)alias6->Alias);
666 free(alias6);
667 }
668 FirstAliasIpv6=NULL;
669 }
670#ifdef USE_PCRE
671 {
672 struct hostalias_regex *alias;
673 struct hostalias_regex *next;
bd43d81f 674
6e24f222
FM
675 for (alias=FirstAliasRe ; alias ; alias=next) {
676 next=alias->Next;
677 pcre_free(alias->Re);
678 free((void *)alias->Alias);
679 free(alias);
680 }
681 FirstAliasRe=NULL;
682 }
683#endif
22715352
FM
684}
685
686/*!
687Replace the host name by its alias if it is in our list.
688
5207d9f8 689\param url The host name.
22715352
FM
690
691\return The pointer to the host name or its alias.
692*/
6e24f222 693static const char *alias_url_name(const char *url,const char *next)
22715352 694{
5207d9f8 695 struct hostalias_name *alias;
22715352
FM
696 int len;
697
7819e0d5 698 len=(int)(next-url);
5207d9f8 699 for (alias=FirstAliasName ; alias ; alias=alias->Next) {
22715352
FM
700 if (len<alias->MinLen) continue;
701 if (alias->HostName_Prefix) {
702 if (alias->HostName_Suffix) {
703 if (strncasecmp(url,alias->HostName_Prefix,alias->PrefixLen)==0 &&
7819e0d5 704 strncasecmp(url+(len-alias->SuffixLen),alias->HostName_Suffix,len)==0) {
22715352
FM
705 return(alias->Alias);
706 }
707 } else {
7819e0d5 708 if (len==alias->PrefixLen && strncasecmp(url,alias->HostName_Prefix,len)==0) {
22715352
FM
709 return(alias->Alias);
710 }
711 }
7819e0d5 712 } else if (strncasecmp(url+(len-alias->SuffixLen),alias->HostName_Suffix,len)==0) {
22715352
FM
713 return(alias->Alias);
714 }
715 }
716 return(url);
717}
718
5207d9f8
FM
719/*!
720Replace the IPv4 address by its alias if it is in our list.
721
722\param url The host name.
723\param ipv4 The address.
724
725\return The pointer to the host name or its alias.
726*/
6e24f222 727static const char *alias_url_ipv4(const char *url,unsigned char *ipv4)
5207d9f8
FM
728{
729 struct hostalias_ipv4 *alias;
730 int len;
731
732 for (alias=FirstAliasIpv4 ; alias ; alias=alias->Next) {
bd43d81f 733 len=alias->NBits;
5207d9f8
FM
734 if ((len<8 || memcmp(ipv4,alias->Ip,len/8)==0) && ((len%8)==0 || (ipv4[len/8] ^ alias->Ip[len/8]) & (0xFFU<<(8-len%8)))==0) {
735 return(alias->Alias);
736 }
737 }
738 return(url);
739}
740
0ec4b481
FM
741/*!
742Replace the IPv6 address by its alias if it is in our list.
743
744\param url The host name.
745\param ipv6 The address.
746
747\return The pointer to the host name or its alias.
748*/
6e24f222 749static const char *alias_url_ipv6(const char *url,unsigned short int *ipv6)
0ec4b481
FM
750{
751 struct hostalias_ipv6 *alias;
752 int len;
753 int i;
754
755 for (alias=FirstAliasIpv6 ; alias ; alias=alias->Next) {
756 len=alias->NBits;
757 for (i=len/16-1 ; i>=0 && ipv6[i]==alias->Ip[i] ; i--);
758 if (i>=0) continue;
759 i=len/16;
760 if (i>=8 || len%16==0 || ((ipv6[i] ^ alias->Ip[i]) & (0xFFFF<<(len-i*16)))==0) {
761 return(alias->Alias);
762 }
763 }
764 return(url);
765}
766
6e24f222
FM
767#ifdef USE_PCRE
768/*!
769Replace the host name by its alias if it is in our list.
770
a16cb22a
FM
771\param url_ptr A pointer to the host name to match. It is replaced
772by a pointer to the alias if a match is found.
6e24f222 773
a16cb22a
FM
774\return \c True if a match is found or \c false if it failed.
775
776\warning The function is not thread safe as it may return a static
777internal buffer.
6e24f222 778*/
a16cb22a 779static bool alias_url_regex(const char **url_ptr)
6e24f222
FM
780{
781 struct hostalias_regex *alias;
a16cb22a
FM
782 int nmatches;
783 const char *url;
784 int url_len;
785 int ovector[30];//size must be a multiple of 3
786 static char Replacement[1024];
787 const char *str;
788 int i;
789 int sub;
790 int repl_idx;
6e24f222 791
a16cb22a
FM
792 url=*url_ptr;
793 url_len=strlen(url);
6e24f222 794 for (alias=FirstAliasRe ; alias ; alias=alias->Next) {
a16cb22a
FM
795 nmatches=pcre_exec(alias->Re,NULL,url,url_len,0,0,ovector,sizeof(ovector)/sizeof(ovector[0]));
796 if (nmatches>=0) {
797 if (nmatches==0) nmatches=(int)(sizeof(ovector)/sizeof(ovector[0]))/3*2; //only 2/3 of the vector is used by pcre_exec
798 if (nmatches==1 || !alias->SubPartern) { //no subpattern to replace
799 *url_ptr=alias->Alias;
800 } else {
801 repl_idx=0;
802 str=alias->Alias;
803 for (i=0 ; str[i] ; i++) {
804 // both the sed \1 and the perl $1 replacement operators are accepted
805 if ((str[i]=='\\' || str[i]=='$') && isdigit(str[i+1])) {
806 sub=str[++i]-'0';
807 if (sub>=1 && sub<=nmatches) {
808 /*
809 * ovector[sub] is the start position of the match.
810 * ovector[sub+1] is the end position of the match.
811 */
812 sub<<=1;
813 if (repl_idx+ovector[sub+1]-ovector[sub]>=sizeof(Replacement)-1) break;
814 memcpy(Replacement+repl_idx,url+ovector[sub],ovector[sub+1]-ovector[sub]);
815 repl_idx+=ovector[sub+1]-ovector[sub];
816 continue;
817 }
818 }
819 if (repl_idx>=sizeof(Replacement)-1) break;
820 Replacement[repl_idx++]=str[i];
821 }
822 Replacement[repl_idx]='\0';
823 *url_ptr=Replacement;
824 }
825 return(true);
6e24f222
FM
826 }
827 }
a16cb22a 828 return(false);
6e24f222
FM
829}
830#endif
831
6fa33a32
FM
832/*!
833Find the beginning of the URL beyond the scheme://
834
835\param url The url possibly containing a scheme.
836
837\return The beginning of the url beyond the scheme.
838*/
839const char *skip_scheme(const char *url)
840{
841 const char *str;
bd43d81f 842
6fa33a32
FM
843 /*
844 Skip any scheme:// at the beginning of the URL (see rfc2396 section 3.1).
845 The underscore is not part of the standard but is found in the squid logs as cache_object://.
846 */
847 for (str=url ; *str && (isalnum(*str) || *str=='+' || *str=='-' || *str=='.' || *str=='_') ; str++);
848 if (str[0]==':' && str[1]=='/' && str[2]=='/') {
849 url=str+3;
850 while (*url=='/') url++;
851 }
852 return(url);
853}
854
22715352
FM
855/*!
856Get the part of the URL necessary to generate the report.
857
858\param url The URL as extracted from the report.
1a2609b0
FM
859\param full_url \c True to keep the whole URL. If \c false,
860the URL is truncated to only keep the host name and port number.
22715352 861*/
e2379f05 862const char *process_url(const char *url,bool full_url)
22715352 863{
e2379f05
FM
864 static char short_url[1024];
865 int i;
22715352 866 const char *start;
5207d9f8 867 int type;
5207d9f8
FM
868 unsigned char ipv4[4];
869 unsigned short int ipv6[8];
7819e0d5 870 const char *next;
22715352 871
6fa33a32 872 start=skip_scheme(url);
1a2609b0 873 if (!full_url) {
e2379f05
FM
874 for (i=0 ; i<sizeof(short_url)-1 && start[i] && start[i]!='/' && start[i]!='?' ; i++)
875 short_url[i]=start[i];
876 short_url[i]='\0';
877 start=short_url;
6e24f222
FM
878#ifdef USE_PCRE
879 if (FirstAliasRe) {
a16cb22a 880 if (alias_url_regex(&start)) return(start);
6e24f222
FM
881 }
882#endif
a16cb22a 883 type=extract_address_mask(start,NULL,ipv4,ipv6,NULL,&next);
5207d9f8
FM
884 if (type==1) {
885 if (FirstAliasName)
7819e0d5 886 start=alias_url_name(start,next);
5207d9f8
FM
887 } else if (type==2) {
888 if (FirstAliasIpv4)
889 start=alias_url_ipv4(start,ipv4);
0ec4b481
FM
890 } else if (type==3) {
891 if (FirstAliasIpv6)
892 start=alias_url_ipv6(start,ipv6);
5207d9f8 893 }
22715352
FM
894 }
895 return(start);
896}
897
898/*!
899Extract the host name from the URL.
900
901\param url The url whose host name must be extracted.
902\param hostname The buffer to store the host name.
903\param hostsize The size of the host name buffer.
904
905\note The function is stupid at this time. It just searches for the first slash
906in the URL and truncates the URL there. It doesn't take the protocol into account
907nor the port number nor any user or password information.
908*/
909void url_hostname(const char *url,char *hostname,int hostsize)
910{
911 int i;
912
913 hostsize--;
914 for (i=0 ; i<hostsize && url[i] && url[i]!='/' ; i++)
915 hostname[i]=url[i];
916 hostname[i]='\0';
917}
918