]> git.ipfire.org Git - thirdparty/squid.git/blob - src/url.cc
Clenup various code to use debugs() insted of debug()()
[thirdparty/squid.git] / src / url.cc
1
2 /*
3 * $Id: url.cc,v 1.165 2008/02/03 10:00:30 amosjeffries Exp $
4 *
5 * DEBUG: section 23 URL Parsing
6 * AUTHOR: Duane Wessels
7 *
8 * SQUID Web Proxy Cache http://www.squid-cache.org/
9 * ----------------------------------------------------------
10 *
11 * Squid is the result of efforts by numerous individuals from
12 * the Internet community; see the CONTRIBUTORS file for full
13 * details. Many organizations have provided support for Squid's
14 * development; see the SPONSORS file for full details. Squid is
15 * Copyrighted (C) 2001 by the Regents of the University of
16 * California; see the COPYRIGHT file for full details. Squid
17 * incorporates software developed and/or copyrighted by other
18 * sources; see the CREDITS file for full details.
19 *
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
33 *
34 */
35
36 #include "URL.h"
37 #include "HttpRequest.h"
38 #include "URLScheme.h"
39
40 static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn);
41 static const char valid_hostname_chars_u[] =
42 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
43 "abcdefghijklmnopqrstuvwxyz"
44 "0123456789-._"
45 #if USE_IPV6
46 "[:]"
47 #endif
48 ;
49 static const char valid_hostname_chars[] =
50 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
51 "abcdefghijklmnopqrstuvwxyz"
52 "0123456789-."
53 #if USE_IPV6
54 "[:]"
55 #endif
56 ;
57
58 void
59 urlInitialize(void)
60 {
61 debugs(23, 5, "urlInitialize: Initializing...");
62 /* this ensures that the number of protocol strings is the same as
63 * the enum slots allocated because the last enum is always 'TOTAL'.
64 */
65 assert(strcmp(ProtocolStr[PROTO_MAX], "TOTAL") == 0);
66 /*
67 * These test that our matchDomainName() function works the
68 * way we expect it to.
69 */
70 assert(0 == matchDomainName("foo.com", "foo.com"));
71 assert(0 == matchDomainName(".foo.com", "foo.com"));
72 assert(0 == matchDomainName("foo.com", ".foo.com"));
73 assert(0 == matchDomainName(".foo.com", ".foo.com"));
74 assert(0 == matchDomainName("x.foo.com", ".foo.com"));
75 assert(0 != matchDomainName("x.foo.com", "foo.com"));
76 assert(0 != matchDomainName("foo.com", "x.foo.com"));
77 assert(0 != matchDomainName("bar.com", "foo.com"));
78 assert(0 != matchDomainName(".bar.com", "foo.com"));
79 assert(0 != matchDomainName(".bar.com", ".foo.com"));
80 assert(0 != matchDomainName("bar.com", ".foo.com"));
81 assert(0 < matchDomainName("zzz.com", "foo.com"));
82 assert(0 > matchDomainName("aaa.com", "foo.com"));
83 assert(0 == matchDomainName("FOO.com", "foo.COM"));
84 assert(0 < matchDomainName("bfoo.com", "afoo.com"));
85 assert(0 > matchDomainName("afoo.com", "bfoo.com"));
86 assert(0 < matchDomainName("x-foo.com", ".foo.com"));
87 /* more cases? */
88 }
89
90 /**
91 * urlParseProtocol() takes begin (b) and end (e) pointers, but for
92 * backwards compatibility, e defaults to NULL, in which case we
93 * assume b is NULL-terminated.
94 */
95 protocol_t
96 urlParseProtocol(const char *b, const char *e)
97 {
98 /*
99 * if e is NULL, b must be NULL terminated and we
100 * make e point to the first whitespace character
101 * after b.
102 */
103
104 if (NULL == e)
105 e = b + strcspn(b, ":");
106
107 int len = e - b;
108
109 /* test common stuff first */
110
111 if (strncasecmp(b, "http", len) == 0)
112 return PROTO_HTTP;
113
114 if (strncasecmp(b, "ftp", len) == 0)
115 return PROTO_FTP;
116
117 if (strncasecmp(b, "https", len) == 0)
118 return PROTO_HTTPS;
119
120 if (strncasecmp(b, "file", len) == 0)
121 return PROTO_FTP;
122
123 if (strncasecmp(b, "gopher", len) == 0)
124 return PROTO_GOPHER;
125
126 if (strncasecmp(b, "wais", len) == 0)
127 return PROTO_WAIS;
128
129 if (strncasecmp(b, "cache_object", len) == 0)
130 return PROTO_CACHEOBJ;
131
132 if (strncasecmp(b, "urn", len) == 0)
133 return PROTO_URN;
134
135 if (strncasecmp(b, "whois", len) == 0)
136 return PROTO_WHOIS;
137
138 if (strncasecmp(b, "internal", len) == 0)
139 return PROTO_INTERNAL;
140
141 return PROTO_NONE;
142 }
143
144 int
145 urlDefaultPort(protocol_t p)
146 {
147 switch (p) {
148
149 case PROTO_HTTP:
150 return 80;
151
152 case PROTO_HTTPS:
153 return 443;
154
155 case PROTO_FTP:
156 return 21;
157
158 case PROTO_GOPHER:
159 return 70;
160
161 case PROTO_WAIS:
162 return 210;
163
164 case PROTO_CACHEOBJ:
165
166 case PROTO_INTERNAL:
167 return CACHE_HTTP_PORT;
168
169 case PROTO_WHOIS:
170 return 43;
171
172 default:
173 return 0;
174 }
175 }
176
177 /*
178 * Parse a URI/URL.
179 *
180 * If the 'request' arg is non-NULL, put parsed values there instead
181 * of allocating a new HttpRequest.
182 *
183 * This abuses HttpRequest as a way of representing the parsed url
184 * and its components.
185 * method is used to switch parsers and to init the HttpRequest.
186 * If method is METHOD_CONNECT, then rather than a URL a hostname:port is
187 * looked for.
188 * The url is non const so that if its too long we can NULL-terminate it in place.
189 */
190
191 /*
192 * This routine parses a URL. Its assumed that the URL is complete -
193 * ie, the end of the string is the end of the URL. Don't pass a partial
194 * URL here as this routine doesn't have any way of knowing whether
195 * its partial or not (ie, it handles the case of no trailing slash as
196 * being "end of host with implied path of /".
197 */
198 HttpRequest *
199 urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
200 {
201 LOCAL_ARRAY(char, proto, MAX_URL);
202 LOCAL_ARRAY(char, login, MAX_URL);
203 LOCAL_ARRAY(char, host, MAX_URL);
204 LOCAL_ARRAY(char, urlpath, MAX_URL);
205 char *t = NULL;
206 char *q = NULL;
207 int port;
208 protocol_t protocol = PROTO_NONE;
209 int l;
210 int i;
211 const char *src;
212 char *dst;
213 proto[0] = host[0] = urlpath[0] = login[0] = '\0';
214
215 if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
216 /* terminate so it doesn't overflow other buffers */
217 *(url + (MAX_URL >> 1)) = '\0';
218 debugs(23, 1, "urlParse: URL too large (" << l << " bytes)");
219 return NULL;
220 }
221 if (method == METHOD_CONNECT) {
222 port = CONNECT_PORT;
223
224 if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
225 if (sscanf(url, "%[^:]:%d", host, &port) < 1)
226 return NULL;
227
228 } else if (!strncmp(url, "urn:", 4)) {
229 return urnParse(method, url);
230 } else {
231 /* Parse the URL: */
232 src = url;
233 i = 0;
234 /* Find first : - everything before is protocol */
235 for (i = 0, dst = proto; i < l && *src != ':'; i++, src++, dst++) {
236 *dst = *src;
237 }
238 if (i >= l)
239 return NULL;
240 *dst = '\0';
241
242 /* Then its :// */
243 /* (XXX yah, I'm not checking we've got enough data left before checking the array..) */
244 if (*src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
245 return NULL;
246 i += 3;
247 src += 3;
248
249 /* Then everything until first /; thats host (and port; which we'll look for here later) */
250 /* bug 1881: If we don't get a "/" then we imply it was there */
251 for (dst = host; i < l && *src != '/' && src != '\0'; i++, src++, dst++) {
252 *dst = *src;
253 }
254
255 /*
256 * We can't check for "i >= l" here because we could be at the end of the line
257 * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
258 * been -given- a valid URL and the path is just '/'.
259 */
260 if (i > l)
261 return NULL;
262 *dst = '\0';
263
264 /* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
265 for (dst = urlpath; i < l && *src != '\r' && *src != '\n' && *src != '\0'; i++, src++, dst++) {
266 *dst = *src;
267 }
268
269 /* We -could- be at the end of the buffer here */
270 if (i > l)
271 return NULL;
272 /* If the URL path is empty we set it to be "/" */
273 if (dst == urlpath) {
274 *(dst++) = '/';
275 }
276 *dst = '\0';
277
278 protocol = urlParseProtocol(proto);
279 port = urlDefaultPort(protocol);
280
281 /* Is there any login information? (we should eventually parse it above) */
282 if ((t = strrchr(host, '@'))) {
283 strcpy((char *) login, (char *) host);
284 t = strrchr(login, '@');
285 *t = 0;
286 strcpy((char *) host, t + 1);
287 }
288
289 /* Is there any host information? (we should eventually parse it above) */
290 if(*host == '[') {
291 /* strip any IPA brackets. valid under IPv6. */
292 dst = host;
293 #if USE_IPV6
294 /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
295 src = host; src++;
296 l = strlen(host);
297 i = 1;
298 for (; i < l && *src != ']' && *src != '\0'; i++, src++, dst++) {
299 *dst = *src;
300 }
301
302 /* we moved in-place, so truncate the actual hostname found */
303 *(dst++) = '\0';
304 #else
305 /* IPv4-pure needs to skip the whole hostname to ']' inclusive for now */
306 while(*dst != '\0' && *dst != ']') dst++;
307 #endif
308
309 /* skip ahead to either start of port, or original EOS */
310 while(*dst != '\0' && *dst != ':') dst++;
311 t = dst;
312 } else {
313 t = strrchr(host, ':');
314
315 if(t != strchr(host,':') ) {
316 /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
317 /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
318 /* therefore we MUST accept the case where they are not bracketed at all. */
319 t = NULL;
320 }
321 }
322
323 if (t && *t == ':') {
324 *t = '\0'; t++;
325 port = atoi(t);
326 }
327 }
328
329 for (t = host; *t; t++)
330 *t = xtolower(*t);
331
332 if (stringHasWhitespace(host)) {
333 if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
334 t = q = host;
335 while (*t) {
336 if (!xisspace(*t))
337 *q++ = *t;
338 t++;
339 }
340 *q = '\0';
341 }
342 }
343
344 debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
345
346 if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
347 debugs(23, 1, "urlParse: Illegal character in hostname '" << host << "'");
348 return NULL;
349 }
350
351 if (Config.appendDomain && !strchr(host, '.'))
352 strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
353
354 /* remove trailing dots from hostnames */
355 while ((l = strlen(host)) > 0 && host[--l] == '.')
356 host[l] = '\0';
357
358 /* reject duplicate or leading dots */
359 if (strstr(host, "..") || *host == '.') {
360 debugs(23, 1, "urlParse: Illegal hostname '" << host << "'");
361 return NULL;
362 }
363
364 if (port < 1 || port > 65535) {
365 debugs(23, 3, "urlParse: Invalid port '" << port << "'");
366 return NULL;
367 }
368
369 #ifdef HARDCODE_DENY_PORTS
370 /* These ports are filtered in the default squid.conf, but
371 * maybe someone wants them hardcoded... */
372 if (port == 7 || port == 9 || port == 19) {
373 debugs(23, 0, "urlParse: Deny access to port " << port);
374 return NULL;
375 }
376 #endif
377
378 if (stringHasWhitespace(urlpath)) {
379 debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
380
381 switch (Config.uri_whitespace) {
382
383 case URI_WHITESPACE_DENY:
384 return NULL;
385
386 case URI_WHITESPACE_ALLOW:
387 break;
388
389 case URI_WHITESPACE_ENCODE:
390 t = rfc1738_escape_unescaped(urlpath);
391 xstrncpy(urlpath, t, MAX_URL);
392 break;
393
394 case URI_WHITESPACE_CHOP:
395 *(urlpath + strcspn(urlpath, w_space)) = '\0';
396 break;
397
398 case URI_WHITESPACE_STRIP:
399 default:
400 t = q = urlpath;
401 while (*t) {
402 if (!xisspace(*t))
403 *q++ = *t;
404 t++;
405 }
406 *q = '\0';
407 }
408 }
409
410 if (NULL == request)
411 request = new HttpRequest(method, protocol, urlpath);
412 else {
413 request->initHTTP(method, protocol, urlpath);
414 }
415
416 request->SetHost(host);
417 xstrncpy(request->login, login, MAX_LOGIN_SZ);
418 request->port = (u_short) port;
419 return request;
420 }
421
422 static HttpRequest *
423 urnParse(const HttpRequestMethod& method, char *urn)
424 {
425 debugs(50, 5, "urnParse: " << urn);
426 return new HttpRequest(method, PROTO_URN, urn + 4);
427 }
428
429 const char *
430 urlCanonical(HttpRequest * request)
431 {
432 LOCAL_ARRAY(char, portbuf, 32);
433 /// \todo AYJ: Performance: making this a ptr and allocating when needed will be better than a write and future xstrdup().
434 LOCAL_ARRAY(char, urlbuf, MAX_URL);
435
436 if (request->canonical)
437 return request->canonical;
438
439 if (request->protocol == PROTO_URN) {
440 snprintf(urlbuf, MAX_URL, "urn:%s", request->urlpath.buf());
441 } else {
442 /// \todo AYJ: this could use "if..else and method == METHOD_CONNECT" easier.
443 switch (request->method.id()) {
444
445 case METHOD_CONNECT:
446 snprintf(urlbuf, MAX_URL, "%s:%d", request->GetHost(), request->port);
447 break;
448
449 default:
450 portbuf[0] = '\0';
451
452 if (request->port != urlDefaultPort(request->protocol))
453 snprintf(portbuf, 32, ":%d", request->port);
454
455 snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s%s",
456 ProtocolStr[request->protocol],
457 request->login,
458 *request->login ? "@" : null_string,
459 request->GetHost(),
460 portbuf,
461 request->urlpath.buf());
462
463 break;
464 }
465 }
466
467 return (request->canonical = xstrdup(urlbuf));
468 }
469
470 /** \todo AYJ: Performance: This is an *almost* duplicate of urlCanoncical. But elides the query-string.
471 * After copying it on in the first place! Would be less code to merge the two with a flag parameter.
472 * and never copy the query-string part in the first place
473 */
474 char *
475 urlCanonicalClean(const HttpRequest * request)
476 {
477 LOCAL_ARRAY(char, buf, MAX_URL);
478 LOCAL_ARRAY(char, portbuf, 32);
479 LOCAL_ARRAY(char, loginbuf, MAX_LOGIN_SZ + 1);
480 char *t;
481
482 if (request->protocol == PROTO_URN) {
483 snprintf(buf, MAX_URL, "urn:%s", request->urlpath.buf());
484 } else {
485 /// \todo AYJ: this could use "if..else and method == METHOD_CONNECT" easier.
486 switch (request->method.id()) {
487
488 case METHOD_CONNECT:
489 snprintf(buf, MAX_URL, "%s:%d",
490 request->GetHost(),
491 request->port);
492 break;
493
494 default:
495 portbuf[0] = '\0';
496
497 if (request->port != urlDefaultPort(request->protocol))
498 snprintf(portbuf, 32, ":%d", request->port);
499
500 loginbuf[0] = '\0';
501
502 if ((int) strlen(request->login) > 0) {
503 strcpy(loginbuf, request->login);
504
505 if ((t = strchr(loginbuf, ':')))
506 *t = '\0';
507
508 strcat(loginbuf, "@");
509 }
510
511 snprintf(buf, MAX_URL, "%s://%s%s%s%s",
512 ProtocolStr[request->protocol],
513 loginbuf,
514 request->GetHost(),
515 portbuf,
516 request->urlpath.buf());
517 /*
518 * strip arguments AFTER a question-mark
519 */
520
521 if (Config.onoff.strip_query_terms)
522 if ((t = strchr(buf, '?')))
523 *(++t) = '\0';
524
525 break;
526 }
527 }
528
529 if (stringHasCntl(buf))
530 xstrncpy(buf, rfc1738_escape_unescaped(buf), MAX_URL);
531
532 return buf;
533 }
534
535 /*
536 * Test if a URL is relative.
537 *
538 * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
539 * appear before a ':'.
540 */
541 bool
542 urlIsRelative(const char *url)
543 {
544 const char *p;
545
546 if (url == NULL) {
547 return (false);
548 }
549 if (*url == '\0') {
550 return (false);
551 }
552
553 for (p = url; *p != '\0' && *p != ':' && *p != '/'; p++);
554
555 if (*p == ':') {
556 return (false);
557 }
558 return (true);
559 }
560
561 /*
562 * Convert a relative URL to an absolute URL using the context of a given
563 * request.
564 *
565 * It is assumed that you have already ensured that the URL is relative.
566 *
567 * If NULL is returned it is an indication that the method in use in the
568 * request does not distinguish between relative and absolute and you should
569 * use the url unchanged.
570 *
571 * If non-NULL is returned, it is up to the caller to free the resulting
572 * memory using safe_free().
573 */
574 char *
575 urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
576 {
577
578 if (req->method.id() == METHOD_CONNECT) {
579 return (NULL);
580 }
581
582 char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
583
584 if (req->protocol == PROTO_URN) {
585 snprintf(urlbuf, MAX_URL, "urn:%s", req->urlpath.buf());
586 return (urlbuf);
587 }
588
589 size_t urllen;
590
591 if (req->port != urlDefaultPort(req->protocol)) {
592 urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s:%d",
593 ProtocolStr[req->protocol],
594 req->login,
595 *req->login ? "@" : null_string,
596 req->GetHost(),
597 req->port
598 );
599 } else {
600 urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s",
601 ProtocolStr[req->protocol],
602 req->login,
603 *req->login ? "@" : null_string,
604 req->GetHost()
605 );
606 }
607
608 if (relUrl[0] == '/') {
609 strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
610 } else {
611 const char *path = req->urlpath.buf();
612 const char *last_slash = strrchr(path, '/');
613
614 if (last_slash == NULL) {
615 urlbuf[urllen++] = '/';
616 strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
617 } else {
618 last_slash++;
619 size_t pathlen = last_slash - path;
620 if (pathlen > MAX_URL - urllen - 1) {
621 pathlen = MAX_URL - urllen - 1;
622 }
623 strncpy(&urlbuf[urllen], path, pathlen);
624 urllen += pathlen;
625 if (urllen + 1 < MAX_URL) {
626 strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
627 }
628 }
629 }
630
631 return (urlbuf);
632 }
633
634 /*
635 * matchDomainName() compares a hostname with a domainname according
636 * to the following rules:
637 *
638 * HOST DOMAIN MATCH?
639 * ------------- ------------- ------
640 * foo.com foo.com YES
641 * .foo.com foo.com YES
642 * x.foo.com foo.com NO
643 * foo.com .foo.com YES
644 * .foo.com .foo.com YES
645 * x.foo.com .foo.com YES
646 *
647 * We strip leading dots on hosts (but not domains!) so that
648 * ".foo.com" is is always the same as "foo.com".
649 *
650 * Return values:
651 * 0 means the host matches the domain
652 * 1 means the host is greater than the domain
653 * -1 means the host is less than the domain
654 */
655
656 int
657 matchDomainName(const char *h, const char *d)
658 {
659 int dl;
660 int hl;
661
662 while ('.' == *h)
663 h++;
664
665 hl = strlen(h);
666
667 dl = strlen(d);
668
669 /*
670 * Start at the ends of the two strings and work towards the
671 * beginning.
672 */
673 while (xtolower(h[--hl]) == xtolower(d[--dl])) {
674 if (hl == 0 && dl == 0) {
675 /*
676 * We made it all the way to the beginning of both
677 * strings without finding any difference.
678 */
679 return 0;
680 }
681
682 if (0 == hl) {
683 /*
684 * The host string is shorter than the domain string.
685 * There is only one case when this can be a match.
686 * If the domain is just one character longer, and if
687 * that character is a leading '.' then we call it a
688 * match.
689 */
690
691 if (1 == dl && '.' == d[0])
692 return 0;
693 else
694 return -1;
695 }
696
697 if (0 == dl) {
698 /*
699 * The domain string is shorter than the host string.
700 * This is a match only if the first domain character
701 * is a leading '.'.
702 */
703
704 if ('.' == d[0])
705 return 0;
706 else
707 return 1;
708 }
709 }
710
711 /*
712 * We found different characters in the same position (from the end).
713 */
714 /*
715 * If one of those character is '.' then its special. In order
716 * for splay tree sorting to work properly, "x-foo.com" must
717 * be greater than ".foo.com" even though '-' is less than '.'.
718 */
719 if ('.' == d[dl])
720 return 1;
721
722 if ('.' == h[hl])
723 return -1;
724
725 return (xtolower(h[hl]) - xtolower(d[dl]));
726 }
727
728
729 /*
730 * return true if we can serve requests for this method.
731 */
732 int
733 urlCheckRequest(const HttpRequest * r)
734 {
735 int rc = 0;
736 /* protocol "independent" methods
737 *
738 * actually these methods are specific to HTTP:
739 * they are methods we recieve on our HTTP port,
740 * and if we had a FTP listener would not be relevant
741 * there.
742 *
743 * So, we should delegate them to HTTP. The problem is that we
744 * do not have a default protocol from the client side of HTTP.
745 */
746
747 if (r->method == METHOD_CONNECT)
748 return 1;
749
750 if (r->method == METHOD_TRACE)
751 return 1;
752
753 if (r->method == METHOD_PURGE)
754 return 1;
755
756 /* does method match the protocol? */
757 switch (r->protocol) {
758
759 case PROTO_URN:
760
761 case PROTO_HTTP:
762
763 case PROTO_CACHEOBJ:
764 rc = 1;
765 break;
766
767 case PROTO_FTP:
768
769 if (r->method == METHOD_PUT)
770 rc = 1;
771
772 case PROTO_GOPHER:
773
774 case PROTO_WAIS:
775
776 case PROTO_WHOIS:
777 if (r->method == METHOD_GET)
778 rc = 1;
779 else if (r->method == METHOD_HEAD)
780 rc = 1;
781
782 break;
783
784 case PROTO_HTTPS:
785 #ifdef USE_SSL
786
787 rc = 1;
788
789 break;
790
791 #else
792 /*
793 * Squid can't originate an SSL connection, so it should
794 * never receive an "https:" URL. It should always be
795 * CONNECT instead.
796 */
797 rc = 0;
798
799 #endif
800
801 default:
802 break;
803 }
804
805 return rc;
806 }
807
808 /*
809 * Quick-n-dirty host extraction from a URL. Steps:
810 * Look for a colon
811 * Skip any '/' after the colon
812 * Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
813 * Look for an ending '/' or ':' and terminate
814 * Look for login info preceeded by '@'
815 */
816
817 class URLHostName
818 {
819
820 public:
821 char * extract(char const *url);
822
823 private:
824 static char Host [SQUIDHOSTNAMELEN];
825 void init(char const *);
826 void findHostStart();
827 void trimTrailingChars();
828 void trimAuth();
829 char const *hostStart;
830 char const *url;
831 };
832
833 char *
834 urlHostname(const char *url)
835 {
836 return URLHostName().extract(url);
837 }
838
839 char URLHostName::Host[SQUIDHOSTNAMELEN];
840
841 void
842 URLHostName::init(char const *aUrl)
843 {
844 Host[0] = '\0';
845 url = url;
846 }
847
848 void
849 URLHostName::findHostStart()
850 {
851 if (NULL == (hostStart = strchr(url, ':')))
852 return;
853
854 ++hostStart;
855
856 while (*hostStart != '\0' && *hostStart == '/')
857 ++hostStart;
858
859 #if USE_IPV6
860 if (*hostStart == ']')
861 ++hostStart;
862 #endif
863
864 }
865
866 void
867 URLHostName::trimTrailingChars()
868 {
869 char *t;
870
871 if ((t = strchr(Host, '/')))
872 *t = '\0';
873
874 if ((t = strrchr(Host, ':')))
875 *t = '\0';
876
877 #if USE_IPV6
878 if ((t = strchr(Host, ']')))
879 *t = '\0';
880 #endif
881
882 }
883
884 void
885 URLHostName::trimAuth()
886 {
887 char *t;
888
889 if ((t = strrchr(Host, '@'))) {
890 t++;
891 xmemmove(Host, t, strlen(t) + 1);
892 }
893 }
894
895 char *
896 URLHostName::extract(char const *aUrl)
897 {
898 init(aUrl);
899 findHostStart();
900
901 if (hostStart == NULL)
902 return NULL;
903
904 xstrncpy(Host, hostStart, SQUIDHOSTNAMELEN);
905
906 trimTrailingChars();
907
908 trimAuth();
909
910 return Host;
911 }
912
913 URL::URL() : scheme()
914 {}
915
916 URL::URL(URLScheme const &aScheme): scheme(aScheme)
917 {}