]> git.ipfire.org Git - thirdparty/sarg.git/blob - url.c
da66df59245004e09877f6d7b98f78cbcee9c0e5
[thirdparty/sarg.git] / url.c
1 /*
2 * SARG Squid Analysis Report Generator http://sarg.sourceforge.net
3 * 1998, 2011
4 *
5 * SARG donations:
6 * please look at http://sarg.sourceforge.net/donations.php
7 * Support:
8 * http://sourceforge.net/projects/sarg/forums/forum/363374
9 * ---------------------------------------------------------------------
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
24 *
25 */
26
27 #include "include/conf.h"
28 #include "include/defs.h"
29
30 /*!
31 A host name and the name to report.
32 */
33 struct hostalias_name
34 {
35 //! The next host name in the list or NULL for the last item.
36 struct hostalias_name *Next;
37 //! The minimum length of a candidate host name.
38 int MinLen;
39 //! The length of the constant part at the beginning of the mask.
40 int PrefixLen;
41 //! The length of the constant part at the end of the mask.
42 int SuffixLen;
43 //! The first part of the mask of the host name.
44 const char *HostName_Prefix;
45 //! The second part of the mask of the host name.
46 const char *HostName_Suffix;
47 //! The replacement name.
48 const char *Alias;
49 };
50
51 /*!
52 An IPv4 address and the name to report.
53 */
54 struct hostalias_ipv4
55 {
56 //! The next host name in the list or NULL for the last item.
57 struct hostalias_ipv4 *Next;
58 //! The IP address.
59 unsigned char Ip[4];
60 //! The number of bits in the prefix.
61 int NBits;
62 //! The replacement name.
63 const char *Alias;
64 };
65
66 /*!
67 An IPv6 address and the name to report.
68 */
69 struct hostalias_ipv6
70 {
71 //! The next host name in the list or NULL for the last item.
72 struct hostalias_ipv6 *Next;
73 //! The IP address.
74 unsigned short Ip[8];
75 //! The number of bits in the prefix.
76 int NBits;
77 //! The replacement name.
78 const char *Alias;
79 };
80
81 //! The first host name.
82 static struct hostalias_name *FirstAliasName=NULL;
83 //! The first IPv4 address.
84 static struct hostalias_ipv4 *FirstAliasIpv4=NULL;
85 //! The first IPvĀ§ address.
86 static struct hostalias_ipv6 *FirstAliasIpv6=NULL;
87
88 /*!
89 Store a name to alias.
90
91 \param name The name to match including the wildcard.
92 \param next A pointer to the first character after the name.
93
94 \retval 1 Alias added.
95 \retval 0 Ignore the line.
96 \retval -1 Error.
97 */
98 static int Alias_StoreName(const char *name,const char *next)
99 {
100 const char *NameBegin;
101 const char *NameBeginE;
102 const char *NameEnd;
103 const char *NameEndE;
104 const char *Replace;
105 const char *ReplaceE;
106 const char *str;
107 char sep;
108 struct hostalias_name *alias;
109 struct hostalias_name *new_alias;
110 struct hostalias_name *prev_alias;
111 char *tmp;
112 int len;
113
114 if (*name=='#' || *name==';') return(0);
115
116 // get host name and split at the wildcard
117 NameBegin=name;
118 for (str=NameBegin ; str<next && (unsigned char)*str>' ' && *str!='*' ; str++);
119 NameBeginE=str;
120 if (NameBegin==NameBeginE) NameBegin=NULL;
121 if (str<next && *str=='*') {
122 NameEnd=++str;
123 while (str<next && (unsigned char)*str>' ') {
124 if (*str=='*') {
125 debuga(_("Host name alias \"%s*%s\" contains too many wildcards (*)\n"),NameBegin,NameEnd);
126 return(-1);
127 }
128 str++;
129 }
130 NameEndE=str;
131 if (NameEnd==NameEndE) {
132 debuga(_("Host name alias \"%*s\" must not end with a wildcard\n"),(int)(next-name),name);
133 return(-1);
134 }
135 } else {
136 NameEnd=NULL;
137 }
138 while (str<next && (unsigned char)*str<=' ') str++;
139 if (!NameBegin && !NameEnd) return(0);
140
141 // get the alias
142 sep=*next;
143 if (sep==' ' || sep=='\t') {
144 Replace=next;
145 while (*Replace==' ' || *Replace=='\t') Replace++;
146 if ((unsigned char)*Replace<' ') {
147 Replace=NULL;
148 } else {
149 for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
150 ReplaceE=str;
151 }
152 } else
153 Replace=NULL;
154
155 if (NameBegin) {
156 len=(int)(NameBeginE-NameBegin);
157 tmp=malloc(len+1);
158 if (!tmp) {
159 debuga(_("Not enough memory to store the host name aliasing directives\n"));
160 return(-1);
161 }
162 memcpy(tmp,NameBegin,len);
163 tmp[len]='\0';
164 NameBegin=tmp;
165 }
166 if (NameEnd) {
167 len=(int)(NameEndE-NameEnd);
168 tmp=malloc(len+1);
169 if (!tmp) {
170 if (NameBegin) free((void*)NameBegin);
171 debuga(_("Not enough memory to store the host name aliasing directives\n"));
172 return(-1);
173 }
174 memcpy(tmp,NameEnd,len);
175 tmp[len]='\0';
176 NameEnd=tmp;
177 }
178
179 // ignore duplicates
180 prev_alias=NULL;
181 for (alias=FirstAliasName ; alias ; alias=alias->Next) {
182 if (((NameBegin && alias->HostName_Prefix && !strcmp(NameBegin,alias->HostName_Prefix)) || (!NameBegin && !alias->HostName_Prefix)) &&
183 ((NameEnd && alias->HostName_Suffix && !strcmp(NameEnd,alias->HostName_Suffix)) || (!NameEnd && !alias->HostName_Suffix))) {
184 if (NameBegin) free((void*)NameBegin);
185 return(0);
186 }
187 prev_alias=alias;
188 }
189
190 // insert into the list
191 new_alias=malloc(sizeof(*new_alias));
192 if (!new_alias) {
193 if (NameBegin) free((void*)NameBegin);
194 if (NameEnd) free((void*)NameEnd);
195 debuga(_("Not enough memory to store the host name aliasing directives\n"));
196 return(-1);
197 }
198 new_alias->MinLen=0;
199 if (NameBegin) {
200 new_alias->HostName_Prefix=NameBegin;
201 new_alias->MinLen+=strlen(NameBegin);
202 new_alias->PrefixLen=strlen(NameBegin);
203 } else {
204 new_alias->HostName_Prefix=NULL;
205 new_alias->PrefixLen=0;
206 }
207 if (NameEnd) {
208 new_alias->HostName_Suffix=NameEnd;
209 new_alias->MinLen+=strlen(NameEnd)+1;
210 new_alias->SuffixLen=strlen(NameEnd);
211 } else {
212 new_alias->HostName_Suffix=NULL;
213 new_alias->SuffixLen=0;
214 }
215 if (Replace) {
216 len=(int)(ReplaceE-Replace);
217 tmp=malloc(len+2);
218 if (!tmp) {
219 debuga(_("Not enough memory to store the host name aliasing directives\n"));
220 return(-1);
221 }
222 tmp[0]=ALIAS_PREFIX;
223 memcpy(tmp+1,Replace,len);
224 tmp[len+1]='\0';
225 new_alias->Alias=tmp;
226 } else {
227 tmp=malloc(new_alias->MinLen+2);
228 if (!tmp) {
229 debuga(_("Not enough memory to store the host name aliasing directives\n"));
230 return(-1);
231 }
232 tmp[0]=ALIAS_PREFIX;
233 if (new_alias->HostName_Prefix) strcpy(tmp+1,new_alias->HostName_Prefix);
234 if (new_alias->HostName_Suffix) {
235 tmp[new_alias->PrefixLen+1]='*';
236 strcpy(tmp+new_alias->PrefixLen+2,new_alias->HostName_Suffix);
237 }
238 new_alias->Alias=tmp;
239 }
240
241 new_alias->Next=NULL;
242 if (prev_alias)
243 prev_alias->Next=new_alias;
244 else
245 FirstAliasName=new_alias;
246 return(1);
247 }
248
249 /*!
250 Store a IPv4 to alias.
251
252 \param ipv4 The IPv4 to match.
253 \param nbits The number of bits in the prefix
254 \param next A pointer to the first character after the address.
255
256 \retval 1 Alias added.
257 \retval 0 Ignore the line.
258 \retval -1 Error.
259 */
260 static int Alias_StoreIpv4(unsigned char *ipv4,int nbits,const char *next)
261 {
262 const char *Replace;
263 const char *ReplaceE;
264 const char *str;
265 struct hostalias_ipv4 *alias;
266 struct hostalias_ipv4 *new_alias;
267 struct hostalias_ipv4 *prev_alias;
268 int i;
269 char *tmp;
270 int len;
271
272 // get the alias
273 Replace=next;
274 while (*Replace==' ' || *Replace=='\t') Replace++;
275 if ((unsigned char)*Replace<' ') {
276 Replace=NULL;
277 } else {
278 for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
279 ReplaceE=str;
280 }
281
282 // store more restrictive range first
283 prev_alias=NULL;
284 for (alias=FirstAliasIpv4 ; alias ; alias=alias->Next) {
285 i=(nbits<alias->NBits) ? nbits : alias->NBits;
286 if ((i<8 || memcmp(ipv4,alias->Ip,i/8)==0) && ((i%8)==0 || (ipv4[i/8] ^ alias->Ip[i/8]) & (0xFFU<<(8-i%8)))==0) {
287 break;
288 }
289 prev_alias=alias;
290 }
291
292 // insert into the list
293 new_alias=malloc(sizeof(*new_alias));
294 if (!new_alias) {
295 debuga(_("Not enough memory to store the host name aliasing directives\n"));
296 return(-1);
297 }
298 memcpy(new_alias->Ip,ipv4,4);
299 new_alias->NBits=nbits;
300 if (Replace) {
301 len=(int)(ReplaceE-Replace);
302 tmp=malloc(len+2);
303 if (!tmp) {
304 debuga(_("Not enough memory to store the host name aliasing directives\n"));
305 return(-1);
306 }
307 tmp[0]=ALIAS_PREFIX;
308 memcpy(tmp+1,Replace,len);
309 tmp[len+1]='\0';
310 new_alias->Alias=tmp;
311 } else {
312 tmp=malloc(5*4+1);
313 if (!tmp) {
314 debuga(_("Not enough memory to store the host name aliasing directives\n"));
315 return(-1);
316 }
317 sprintf(tmp,"%c%d.%d.%d.%d/%d",ALIAS_PREFIX,ipv4[0],ipv4[1],ipv4[2],ipv4[3],nbits);
318 new_alias->Alias=tmp;
319 }
320
321 if (prev_alias) {
322 new_alias->Next=prev_alias->Next;
323 prev_alias->Next=new_alias;
324 } else {
325 new_alias->Next=NULL;
326 FirstAliasIpv4=new_alias;
327 }
328 return(1);
329 }
330
331 /*!
332 Store a IPv6 to alias.
333
334 \param ipv6 The IPv6 to match.
335 \param nbits The number of bits in the prefix
336 \param next A pointer to the first character after the address.
337
338 \retval 1 Alias added.
339 \retval 0 Ignore the line.
340 \retval -1 Error.
341 */
342 static int Alias_StoreIpv6(unsigned short *ipv6,int nbits,const char *next)
343 {
344 const char *Replace;
345 const char *ReplaceE;
346 const char *str;
347 struct hostalias_ipv6 *alias;
348 struct hostalias_ipv6 *new_alias;
349 struct hostalias_ipv6 *prev_alias;
350 int i;
351 char *tmp;
352 int len;
353
354 // get the alias
355 Replace=next;
356 while (*Replace==' ' || *Replace=='\t') Replace++;
357 if ((unsigned char)*Replace<' ') {
358 Replace=NULL;
359 } else {
360 for (str=Replace ; *str && (unsigned char)*str>=' ' ; str++);
361 ReplaceE=str;
362 }
363
364 // store more restrictive range first
365 prev_alias=NULL;
366 for (alias=FirstAliasIpv6 ; alias ; alias=alias->Next) {
367 i=(nbits<alias->NBits) ? nbits : alias->NBits;
368 if ((i<16 || memcmp(ipv6,alias->Ip,i/16*2)==0) && ((i%16)==0 || (ipv6[i/16] ^ alias->Ip[i/16]) & (0xFFFFU<<(16-i%16)))==0) {
369 break;
370 }
371 prev_alias=alias;
372 }
373
374 // insert into the list
375 new_alias=malloc(sizeof(*new_alias));
376 if (!new_alias) {
377 debuga(_("Not enough memory to store the host name aliasing directives\n"));
378 return(-1);
379 }
380 memcpy(new_alias->Ip,ipv6,8*sizeof(unsigned short int));
381 new_alias->NBits=nbits;
382 if (Replace) {
383 len=ReplaceE-Replace;
384 tmp=malloc(len+2);
385 if (!tmp) {
386 debuga(_("Not enough memory to store the host name aliasing directives\n"));
387 return(-1);
388 }
389 tmp[0]=ALIAS_PREFIX;
390 memcpy(tmp+1,Replace,len);
391 tmp[len+1]='\0';
392 new_alias->Alias=tmp;
393 } else {
394 tmp=malloc(5*8+5);
395 if (!tmp) {
396 debuga(_("Not enough memory to store the host name aliasing directives\n"));
397 return(-1);
398 }
399 sprintf(tmp,"%c%x:%x:%x:%x:%x:%x:%x:%x/%d",ALIAS_PREFIX,ipv6[0],ipv6[1],ipv6[2],ipv6[3],ipv6[4],ipv6[5],ipv6[6],ipv6[7],nbits);
400 new_alias->Alias=tmp;
401 }
402
403 if (prev_alias) {
404 new_alias->Next=prev_alias->Next;
405 prev_alias->Next=new_alias;
406 } else {
407 new_alias->Next=NULL;
408 FirstAliasIpv6=new_alias;
409 }
410 return(1);
411 }
412
413 /*!
414 Read the file containing the host names to alias in the report.
415
416 \param Filename The name of the file.
417 */
418 void read_hostalias(const char *Filename)
419 {
420 FILE *fi;
421 longline line;
422 char *buf;
423 int type;
424 const char *name;
425 unsigned char ipv4[4];
426 unsigned short int ipv6[8];
427 int nbits;
428 const char *next;
429
430 if (debug) debuga(_("Reading host alias file \"%s\"\n"),Filename);
431 fi=fopen(Filename,"rt");
432 if (!fi) {
433 debuga(_("Cannot read host name alias file \"%s\" - %s\n"),Filename,strerror(errno));
434 exit(EXIT_FAILURE);
435 }
436
437 if ((line=longline_create())==NULL) {
438 debuga(_("Not enough memory to read the host name aliases\n"));
439 exit(EXIT_FAILURE);
440 }
441
442 while ((buf=longline_read(fi,line)) != NULL) {
443 type=extract_address_mask(buf,&name,ipv4,ipv6,&nbits,&next);
444 if (type<0) {
445 debuga(_("While reading \"%s\"\n"),Filename);
446 exit(EXIT_FAILURE);
447 }
448
449 if (type==1) {
450 Alias_StoreName(name,next);
451 } else if (type==2) {
452 Alias_StoreIpv4(ipv4,nbits,next);
453 } else if (type==3) {
454 Alias_StoreIpv6(ipv6,nbits,next);
455 }
456 }
457
458 longline_destroy(&line);
459 fclose(fi);
460
461 if (debug) {
462 struct hostalias_name *alias1;
463 struct hostalias_ipv4 *alias4;
464 struct hostalias_ipv6 *alias6;
465
466 debuga(_("List of host names to alias:\n"));
467 for (alias1=FirstAliasName ; alias1 ; alias1=alias1->Next) {
468 if (alias1->HostName_Prefix && alias1->HostName_Suffix)
469 debuga(_(" %s*%s => %s\n"),alias1->HostName_Prefix,alias1->HostName_Suffix,alias1->Alias);
470 else if (alias1->HostName_Prefix)
471 debuga(_(" %s => %s\n"),alias1->HostName_Prefix,alias1->Alias);
472 else
473 debuga(_(" *%s => %s\n"),alias1->HostName_Suffix,alias1->Alias);
474 }
475 for (alias4=FirstAliasIpv4 ; alias4 ; alias4=alias4->Next) {
476 debuga(_(" %d.%d.%d.%d/%d => %s\n"),alias4->Ip[0],alias4->Ip[1],alias4->Ip[2],alias4->Ip[3],alias4->NBits,alias4->Alias);
477 }
478 for (alias6=FirstAliasIpv6 ; alias6 ; alias6=alias6->Next) {
479 debuga(_(" %x:%x:%x:%x:%x:%x:%x:%x/%d => %s\n"),alias6->Ip[0],alias6->Ip[1],alias6->Ip[2],alias6->Ip[3],
480 alias6->Ip[4],alias6->Ip[5],alias6->Ip[6],alias6->Ip[7],alias6->NBits,alias6->Alias);
481 }
482 }
483 }
484
485 /*!
486 Free the memory allocated by read_hostalias().
487 */
488 void free_hostalias(void)
489 {
490 struct hostalias_name *alias1;
491 struct hostalias_name *next1;
492 struct hostalias_ipv4 *alias4;
493 struct hostalias_ipv4 *next4;
494 struct hostalias_ipv6 *alias6;
495 struct hostalias_ipv6 *next6;
496
497 for (alias1=FirstAliasName ; alias1 ; alias1=next1) {
498 next1=alias1->Next;
499 if (alias1->HostName_Prefix) free((void *)alias1->HostName_Prefix);
500 if (alias1->HostName_Suffix) free((void *)alias1->HostName_Suffix);
501 free((void *)alias1->Alias);
502 free(alias1);
503 }
504 FirstAliasName=NULL;
505 for (alias4=FirstAliasIpv4 ; alias4 ; alias4=next4) {
506 next4=alias4->Next;
507 free((void *)alias4->Alias);
508 free(alias4);
509 }
510 FirstAliasIpv4=NULL;
511 for (alias6=FirstAliasIpv6 ; alias6 ; alias6=next6) {
512 next6=alias6->Next;
513 free((void *)alias6->Alias);
514 free(alias6);
515 }
516 FirstAliasIpv6=NULL;
517 }
518
519 /*!
520 Replace the host name by its alias if it is in our list.
521
522 \param url The host name.
523
524 \return The pointer to the host name or its alias.
525 */
526 const char *alias_url_name(const char *url,const char *next)
527 {
528 struct hostalias_name *alias;
529 int len;
530
531 len=(int)(next-url);
532 for (alias=FirstAliasName ; alias ; alias=alias->Next) {
533 if (len<alias->MinLen) continue;
534 if (alias->HostName_Prefix) {
535 if (alias->HostName_Suffix) {
536 if (strncasecmp(url,alias->HostName_Prefix,alias->PrefixLen)==0 &&
537 strncasecmp(url+(len-alias->SuffixLen),alias->HostName_Suffix,len)==0) {
538 return(alias->Alias);
539 }
540 } else {
541 if (len==alias->PrefixLen && strncasecmp(url,alias->HostName_Prefix,len)==0) {
542 return(alias->Alias);
543 }
544 }
545 } else if (strncasecmp(url+(len-alias->SuffixLen),alias->HostName_Suffix,len)==0) {
546 return(alias->Alias);
547 }
548 }
549 return(url);
550 }
551
552 /*!
553 Replace the IPv4 address by its alias if it is in our list.
554
555 \param url The host name.
556 \param ipv4 The address.
557
558 \return The pointer to the host name or its alias.
559 */
560 const char *alias_url_ipv4(const char *url,unsigned char *ipv4)
561 {
562 struct hostalias_ipv4 *alias;
563 int len;
564
565 for (alias=FirstAliasIpv4 ; alias ; alias=alias->Next) {
566 len=alias->NBits;
567 if ((len<8 || memcmp(ipv4,alias->Ip,len/8)==0) && ((len%8)==0 || (ipv4[len/8] ^ alias->Ip[len/8]) & (0xFFU<<(8-len%8)))==0) {
568 return(alias->Alias);
569 }
570 }
571 return(url);
572 }
573
574 /*!
575 Replace the IPv6 address by its alias if it is in our list.
576
577 \param url The host name.
578 \param ipv6 The address.
579
580 \return The pointer to the host name or its alias.
581 */
582 const char *alias_url_ipv6(const char *url,unsigned short int *ipv6)
583 {
584 struct hostalias_ipv6 *alias;
585 int len;
586 int i;
587
588 for (alias=FirstAliasIpv6 ; alias ; alias=alias->Next) {
589 len=alias->NBits;
590 for (i=len/16-1 ; i>=0 && ipv6[i]==alias->Ip[i] ; i--);
591 if (i>=0) continue;
592 i=len/16;
593 if (i>=8 || len%16==0 || ((ipv6[i] ^ alias->Ip[i]) & (0xFFFF<<(len-i*16)))==0) {
594 return(alias->Alias);
595 }
596 }
597 return(url);
598 }
599
600 /*!
601 Find the beginning of the URL beyond the scheme://
602
603 \param url The url possibly containing a scheme.
604
605 \return The beginning of the url beyond the scheme.
606 */
607 const char *skip_scheme(const char *url)
608 {
609 const char *str;
610
611 /*
612 Skip any scheme:// at the beginning of the URL (see rfc2396 section 3.1).
613 The underscore is not part of the standard but is found in the squid logs as cache_object://.
614 */
615 for (str=url ; *str && (isalnum(*str) || *str=='+' || *str=='-' || *str=='.' || *str=='_') ; str++);
616 if (str[0]==':' && str[1]=='/' && str[2]=='/') {
617 url=str+3;
618 while (*url=='/') url++;
619 }
620 return(url);
621 }
622
623 /*!
624 Get the part of the URL necessary to generate the report.
625
626 \param url The URL as extracted from the report.
627 \param full_url \c True to keep the whole URL. If \c false,
628 the URL is truncated to only keep the host name and port number.
629 */
630 const char *process_url(char *url,bool full_url)
631 {
632 char *str;
633 const char *start;
634 int type;
635 const char *address;
636 unsigned char ipv4[4];
637 unsigned short int ipv6[8];
638 const char *next;
639
640 start=skip_scheme(url);
641 if (!full_url) {
642 for (str=(char *)start ; *str && *str!='/' && *str!='?' ; str++);
643 *str='\0';
644 type=extract_address_mask(start,&address,ipv4,ipv6,NULL,&next);
645 if (type==1) {
646 if (FirstAliasName)
647 start=alias_url_name(start,next);
648 } else if (type==2) {
649 if (FirstAliasIpv4)
650 start=alias_url_ipv4(start,ipv4);
651 } else if (type==3) {
652 if (FirstAliasIpv6)
653 start=alias_url_ipv6(start,ipv6);
654 }
655 }
656 return(start);
657 }
658
659 /*!
660 Extract the host name from the URL.
661
662 \param url The url whose host name must be extracted.
663 \param hostname The buffer to store the host name.
664 \param hostsize The size of the host name buffer.
665
666 \note The function is stupid at this time. It just searches for the first slash
667 in the URL and truncates the URL there. It doesn't take the protocol into account
668 nor the port number nor any user or password information.
669 */
670 void url_hostname(const char *url,char *hostname,int hostsize)
671 {
672 int i;
673
674 hostsize--;
675 for (i=0 ; i<hostsize && url[i] && url[i]!='/' ; i++)
676 hostname[i]=url[i];
677 hostname[i]='\0';
678 }
679