]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/url.cc
2 * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 /* DEBUG: section 23 URL Parsing */
13 #include "HttpRequest.h"
15 #include "SquidConfig.h"
16 #include "SquidString.h"
19 static HttpRequest
*urlParseFinish(const HttpRequestMethod
& method
,
20 const AnyP::ProtocolType protocol
,
21 const char *const protoStr
,
22 const char *const urlpath
,
23 const char *const host
,
26 HttpRequest
*request
);
27 static HttpRequest
*urnParse(const HttpRequestMethod
& method
, char *urn
, HttpRequest
*request
);
28 static const char valid_hostname_chars_u
[] =
29 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
30 "abcdefghijklmnopqrstuvwxyz"
34 static const char valid_hostname_chars
[] =
35 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
36 "abcdefghijklmnopqrstuvwxyz"
44 static SBuf
star("*");
51 static SBuf
slash("/");
56 URL::host(const char *src
)
60 if (hostAddr_
.isAnyAddr()) {
61 xstrncpy(host_
, src
, sizeof(host_
));
62 hostIsNumeric_
= false;
64 hostAddr_
.toHostStr(host_
, sizeof(host_
));
65 debugs(23, 3, "given IP: " << hostAddr_
);
74 // RFC 3986 section 3.3 says path can be empty (path-abempty).
75 // RFC 7230 sections 2.7.3, 5.3.1, 5.7.2 - says path cannot be empty, default to "/"
76 // at least when sending and using. We must still accept path-abempty as input.
77 if (path_
.isEmpty() && (scheme_
== AnyP::PROTO_HTTP
|| scheme_
== AnyP::PROTO_HTTPS
))
86 debugs(23, 5, "urlInitialize: Initializing...");
87 /* this ensures that the number of protocol strings is the same as
88 * the enum slots allocated because the last enum is always 'MAX'.
90 assert(strcmp(AnyP::ProtocolType_str
[AnyP::PROTO_MAX
], "MAX") == 0);
92 * These test that our matchDomainName() function works the
93 * way we expect it to.
95 assert(0 == matchDomainName("foo.com", "foo.com"));
96 assert(0 == matchDomainName(".foo.com", "foo.com"));
97 assert(0 == matchDomainName("foo.com", ".foo.com"));
98 assert(0 == matchDomainName(".foo.com", ".foo.com"));
99 assert(0 == matchDomainName("x.foo.com", ".foo.com"));
100 assert(0 == matchDomainName("y.x.foo.com", ".foo.com"));
101 assert(0 != matchDomainName("x.foo.com", "foo.com"));
102 assert(0 != matchDomainName("foo.com", "x.foo.com"));
103 assert(0 != matchDomainName("bar.com", "foo.com"));
104 assert(0 != matchDomainName(".bar.com", "foo.com"));
105 assert(0 != matchDomainName(".bar.com", ".foo.com"));
106 assert(0 != matchDomainName("bar.com", ".foo.com"));
107 assert(0 < matchDomainName("zzz.com", "foo.com"));
108 assert(0 > matchDomainName("aaa.com", "foo.com"));
109 assert(0 == matchDomainName("FOO.com", "foo.COM"));
110 assert(0 < matchDomainName("bfoo.com", "afoo.com"));
111 assert(0 > matchDomainName("afoo.com", "bfoo.com"));
112 assert(0 < matchDomainName("x-foo.com", ".foo.com"));
114 assert(0 == matchDomainName(".foo.com", ".foo.com", mdnRejectSubsubDomains
));
115 assert(0 == matchDomainName("x.foo.com", ".foo.com", mdnRejectSubsubDomains
));
116 assert(0 != matchDomainName("y.x.foo.com", ".foo.com", mdnRejectSubsubDomains
));
117 assert(0 != matchDomainName(".x.foo.com", ".foo.com", mdnRejectSubsubDomains
));
119 assert(0 == matchDomainName("*.foo.com", "x.foo.com", mdnHonorWildcards
));
120 assert(0 == matchDomainName("*.foo.com", ".x.foo.com", mdnHonorWildcards
));
121 assert(0 == matchDomainName("*.foo.com", ".foo.com", mdnHonorWildcards
));
122 assert(0 != matchDomainName("*.foo.com", "foo.com", mdnHonorWildcards
));
128 * Parse the scheme name from string b, into protocol type.
129 * The string must be 0-terminated.
132 urlParseProtocol(const char *b
)
134 // make e point to the ':' character
135 const char *e
= b
+ strcspn(b
, ":");
138 /* test common stuff first */
140 if (strncasecmp(b
, "http", len
) == 0)
141 return AnyP::PROTO_HTTP
;
143 if (strncasecmp(b
, "ftp", len
) == 0)
144 return AnyP::PROTO_FTP
;
146 if (strncasecmp(b
, "https", len
) == 0)
147 return AnyP::PROTO_HTTPS
;
149 if (strncasecmp(b
, "file", len
) == 0)
150 return AnyP::PROTO_FTP
;
152 if (strncasecmp(b
, "coap", len
) == 0)
153 return AnyP::PROTO_COAP
;
155 if (strncasecmp(b
, "coaps", len
) == 0)
156 return AnyP::PROTO_COAPS
;
158 if (strncasecmp(b
, "gopher", len
) == 0)
159 return AnyP::PROTO_GOPHER
;
161 if (strncasecmp(b
, "wais", len
) == 0)
162 return AnyP::PROTO_WAIS
;
164 if (strncasecmp(b
, "cache_object", len
) == 0)
165 return AnyP::PROTO_CACHE_OBJECT
;
167 if (strncasecmp(b
, "urn", len
) == 0)
168 return AnyP::PROTO_URN
;
170 if (strncasecmp(b
, "whois", len
) == 0)
171 return AnyP::PROTO_WHOIS
;
174 return AnyP::PROTO_UNKNOWN
;
176 return AnyP::PROTO_NONE
;
182 * If the 'request' arg is non-NULL, put parsed values there instead
183 * of allocating a new HttpRequest.
185 * This abuses HttpRequest as a way of representing the parsed url
186 * and its components.
187 * method is used to switch parsers and to init the HttpRequest.
188 * If method is Http::METHOD_CONNECT, then rather than a URL a hostname:port is
190 * The url is non const so that if its too long we can NULL-terminate it in place.
194 * This routine parses a URL. Its assumed that the URL is complete -
195 * ie, the end of the string is the end of the URL. Don't pass a partial
196 * URL here as this routine doesn't have any way of knowing whether
197 * its partial or not (ie, it handles the case of no trailing slash as
198 * being "end of host with implied path of /".
201 urlParse(const HttpRequestMethod
& method
, char *url
, HttpRequest
*request
)
203 LOCAL_ARRAY(char, proto
, MAX_URL
);
204 LOCAL_ARRAY(char, login
, MAX_URL
);
205 LOCAL_ARRAY(char, host
, MAX_URL
);
206 LOCAL_ARRAY(char, urlpath
, MAX_URL
);
210 AnyP::ProtocolType protocol
= AnyP::PROTO_NONE
;
215 proto
[0] = host
[0] = urlpath
[0] = login
[0] = '\0';
217 if ((l
= strlen(url
)) + Config
.appendDomainLen
> (MAX_URL
- 1)) {
218 /* terminate so it doesn't overflow other buffers */
219 *(url
+ (MAX_URL
>> 1)) = '\0';
220 debugs(23, DBG_IMPORTANT
, "urlParse: URL too large (" << l
<< " bytes)");
223 if (method
== Http::METHOD_CONNECT
) {
226 if (sscanf(url
, "[%[^]]]:%d", host
, &port
) < 1)
227 if (sscanf(url
, "%[^:]:%d", host
, &port
) < 1)
230 } else if ((method
== Http::METHOD_OPTIONS
|| method
== Http::METHOD_TRACE
) &&
231 URL::Asterisk().cmp(url
) == 0) {
232 protocol
= AnyP::PROTO_HTTP
;
233 port
= 80; // or the slow way ... AnyP::UriScheme(protocol,"http").defaultPort();
234 return urlParseFinish(method
, protocol
, "http", url
, host
, SBuf(), port
, request
);
235 } else if (!strncmp(url
, "urn:", 4)) {
236 return urnParse(method
, url
, request
);
241 /* Find first : - everything before is protocol */
242 for (i
= 0, dst
= proto
; i
< l
&& *src
!= ':'; ++i
, ++src
, ++dst
) {
250 if ((i
+3) > l
|| *src
!= ':' || *(src
+ 1) != '/' || *(src
+ 2) != '/')
255 /* Then everything until first /; thats host (and port; which we'll look for here later) */
256 // bug 1881: If we don't get a "/" then we imply it was there
257 // bug 3074: We could just be given a "?" or "#". These also imply "/"
258 // bug 3233: whitespace is also a hostname delimiter.
259 for (dst
= host
; i
< l
&& *src
!= '/' && *src
!= '?' && *src
!= '#' && *src
!= '\0' && !xisspace(*src
); ++i
, ++src
, ++dst
) {
264 * We can't check for "i >= l" here because we could be at the end of the line
265 * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
266 * been -given- a valid URL and the path is just '/'.
272 // bug 3074: received 'path' starting with '?', '#', or '\0' implies '/'
273 if (*src
== '?' || *src
== '#' || *src
== '\0') {
279 /* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
280 for (; i
< l
&& *src
!= '\r' && *src
!= '\n' && *src
!= '\0'; ++i
, ++src
, ++dst
) {
284 /* We -could- be at the end of the buffer here */
287 /* If the URL path is empty we set it to be "/" */
288 if (dst
== urlpath
) {
294 protocol
= urlParseProtocol(proto
);
295 port
= AnyP::UriScheme(protocol
).defaultPort();
297 /* Is there any login information? (we should eventually parse it above) */
298 t
= strrchr(host
, '@');
300 strncpy((char *) login
, (char *) host
, sizeof(login
)-1);
301 login
[sizeof(login
)-1] = '\0';
302 t
= strrchr(login
, '@');
304 strncpy((char *) host
, t
+ 1, sizeof(host
)-1);
305 host
[sizeof(host
)-1] = '\0';
306 // Bug 4498: URL-unescape the login info after extraction
307 rfc1738_unescape(login
);
310 /* Is there any host information? (we should eventually parse it above) */
312 /* strip any IPA brackets. valid under IPv6. */
314 /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
319 for (; i
< l
&& *src
!= ']' && *src
!= '\0'; ++i
, ++src
, ++dst
) {
323 /* we moved in-place, so truncate the actual hostname found */
327 /* skip ahead to either start of port, or original EOS */
328 while (*dst
!= '\0' && *dst
!= ':')
332 t
= strrchr(host
, ':');
334 if (t
!= strchr(host
,':') ) {
335 /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
336 /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
337 /* therefore we MUST accept the case where they are not bracketed at all. */
342 // Bug 3183 sanity check: If scheme is present, host must be too.
343 if (protocol
!= AnyP::PROTO_NONE
&& host
[0] == '\0') {
344 debugs(23, DBG_IMPORTANT
, "SECURITY ALERT: Missing hostname in URL '" << url
<< "'. see access.log for details.");
348 if (t
&& *t
== ':') {
355 for (t
= host
; *t
; ++t
)
358 if (stringHasWhitespace(host
)) {
359 if (URI_WHITESPACE_STRIP
== Config
.uri_whitespace
) {
372 debugs(23, 3, "urlParse: Split URL '" << url
<< "' into proto='" << proto
<< "', host='" << host
<< "', port='" << port
<< "', path='" << urlpath
<< "'");
374 if (Config
.onoff
.check_hostnames
&& strspn(host
, Config
.onoff
.allow_underscore
? valid_hostname_chars_u
: valid_hostname_chars
) != strlen(host
)) {
375 debugs(23, DBG_IMPORTANT
, "urlParse: Illegal character in hostname '" << host
<< "'");
379 /* For IPV6 addresses also check for a colon */
380 if (Config
.appendDomain
&& !strchr(host
, '.') && !strchr(host
, ':'))
381 strncat(host
, Config
.appendDomain
, SQUIDHOSTNAMELEN
- strlen(host
) - 1);
383 /* remove trailing dots from hostnames */
384 while ((l
= strlen(host
)) > 0 && host
[--l
] == '.')
387 /* reject duplicate or leading dots */
388 if (strstr(host
, "..") || *host
== '.') {
389 debugs(23, DBG_IMPORTANT
, "urlParse: Illegal hostname '" << host
<< "'");
393 if (port
< 1 || port
> 65535) {
394 debugs(23, 3, "urlParse: Invalid port '" << port
<< "'");
398 #if HARDCODE_DENY_PORTS
399 /* These ports are filtered in the default squid.conf, but
400 * maybe someone wants them hardcoded... */
401 if (port
== 7 || port
== 9 || port
== 19) {
402 debugs(23, DBG_CRITICAL
, "urlParse: Deny access to port " << port
);
407 if (stringHasWhitespace(urlpath
)) {
408 debugs(23, 2, "urlParse: URI has whitespace: {" << url
<< "}");
410 switch (Config
.uri_whitespace
) {
412 case URI_WHITESPACE_DENY
:
415 case URI_WHITESPACE_ALLOW
:
418 case URI_WHITESPACE_ENCODE
:
419 t
= rfc1738_escape_unescaped(urlpath
);
420 xstrncpy(urlpath
, t
, MAX_URL
);
423 case URI_WHITESPACE_CHOP
:
424 *(urlpath
+ strcspn(urlpath
, w_space
)) = '\0';
427 case URI_WHITESPACE_STRIP
:
441 return urlParseFinish(method
, protocol
, proto
, urlpath
, host
, SBuf(login
), port
, request
);
445 * Update request with parsed URI data. If the request arg is
446 * non-NULL, put parsed values there instead of allocating a new
450 urlParseFinish(const HttpRequestMethod
& method
,
451 const AnyP::ProtocolType protocol
,
452 const char *const protoStr
, // for unknown protocols
453 const char *const urlpath
,
454 const char *const host
,
457 HttpRequest
*request
)
460 request
= new HttpRequest(method
, protocol
, protoStr
, urlpath
);
462 request
->initHTTP(method
, protocol
, protoStr
, urlpath
);
465 request
->url
.host(host
);
466 request
->url
.userInfo(login
);
467 request
->url
.port(port
);
472 urnParse(const HttpRequestMethod
& method
, char *urn
, HttpRequest
*request
)
474 debugs(50, 5, "urnParse: " << urn
);
476 request
->initHTTP(method
, AnyP::PROTO_URN
, "urn", urn
+ 4);
480 return new HttpRequest(method
, AnyP::PROTO_URN
, "urn", urn
+ 4);
487 authorityHttp_
.clear();
488 authorityWithPort_
.clear();
492 URL::authority(bool requirePort
) const
494 if (authorityHttp_
.isEmpty()) {
496 // both formats contain Host/IP
497 authorityWithPort_
.append(host());
498 authorityHttp_
= authorityWithPort_
;
500 // authorityForm_ only has :port if it is non-default
501 authorityWithPort_
.appendf(":%u",port());
502 if (port() != getScheme().defaultPort())
503 authorityHttp_
= authorityWithPort_
;
506 return requirePort
? authorityWithPort_
: authorityHttp_
;
510 URL::absolute() const
512 if (absolute_
.isEmpty()) {
513 // TODO: most URL will be much shorter, avoid allocating this much
514 absolute_
.reserveCapacity(MAX_URL
);
516 absolute_
.append(getScheme().image());
517 absolute_
.append(":",1);
518 if (getScheme() != AnyP::PROTO_URN
) {
519 absolute_
.append("//", 2);
520 const bool omitUserInfo
= getScheme() == AnyP::PROTO_HTTP
||
521 getScheme() != AnyP::PROTO_HTTPS
||
522 userInfo().isEmpty();
524 absolute_
.append(userInfo());
525 absolute_
.append("@", 1);
527 absolute_
.append(authority());
529 absolute_
.append(path());
535 /** \todo AYJ: Performance: This is an *almost* duplicate of HttpRequest::effectiveRequestUri(). But elides the query-string.
536 * After copying it on in the first place! Would be less code to merge the two with a flag parameter.
537 * and never copy the query-string part in the first place
540 urlCanonicalClean(const HttpRequest
* request
)
542 LOCAL_ARRAY(char, buf
, MAX_URL
);
544 snprintf(buf
, sizeof(buf
), SQUIDSBUFPH
, SQUIDSBUFPRINT(request
->effectiveRequestUri()));
545 buf
[sizeof(buf
)-1] = '\0';
547 // URN, CONNECT method, and non-stripped URIs can go straight out
548 if (Config
.onoff
.strip_query_terms
&& !(request
->method
== Http::METHOD_CONNECT
|| request
->url
.getScheme() == AnyP::PROTO_URN
)) {
549 // strip anything AFTER a question-mark
550 // leaving the '?' in place
551 if (auto t
= strchr(buf
, '?')) {
556 if (stringHasCntl(buf
))
557 xstrncpy(buf
, rfc1738_escape_unescaped(buf
), MAX_URL
);
563 * Yet another alternative to urlCanonical.
564 * This one adds the https:// parts to Http::METHOD_CONNECT URL
565 * for use in error page outputs.
566 * Luckily we can leverage the others instead of duplicating.
569 urlCanonicalFakeHttps(const HttpRequest
* request
)
571 LOCAL_ARRAY(char, buf
, MAX_URL
);
573 // method CONNECT and port HTTPS
574 if (request
->method
== Http::METHOD_CONNECT
&& request
->url
.port() == 443) {
575 snprintf(buf
, MAX_URL
, "https://%s/*", request
->url
.host());
579 // else do the normal complete canonical thing.
580 return urlCanonicalClean(request
);
584 * Test if a URL is relative.
586 * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
587 * appear before a ':'.
590 urlIsRelative(const char *url
)
601 for (p
= url
; *p
!= '\0' && *p
!= ':' && *p
!= '/'; ++p
);
610 * Convert a relative URL to an absolute URL using the context of a given
613 * It is assumed that you have already ensured that the URL is relative.
615 * If NULL is returned it is an indication that the method in use in the
616 * request does not distinguish between relative and absolute and you should
617 * use the url unchanged.
619 * If non-NULL is returned, it is up to the caller to free the resulting
620 * memory using safe_free().
623 urlMakeAbsolute(const HttpRequest
* req
, const char *relUrl
)
626 if (req
->method
.id() == Http::METHOD_CONNECT
) {
630 char *urlbuf
= (char *)xmalloc(MAX_URL
* sizeof(char));
632 if (req
->url
.getScheme() == AnyP::PROTO_URN
) {
633 // XXX: this is what the original code did, but it seems to break the
634 // intended behaviour of this function. It returns the stored URN path,
635 // not converting the given one into a URN...
636 snprintf(urlbuf
, MAX_URL
, SQUIDSBUFPH
, SQUIDSBUFPRINT(req
->url
.absolute()));
640 SBuf authorityForm
= req
->url
.authority(); // host[:port]
641 const SBuf
&scheme
= req
->url
.getScheme().image();
642 size_t urllen
= snprintf(urlbuf
, MAX_URL
, SQUIDSBUFPH
"://" SQUIDSBUFPH
"%s" SQUIDSBUFPH
,
643 SQUIDSBUFPRINT(scheme
),
644 SQUIDSBUFPRINT(req
->url
.userInfo()),
645 !req
->url
.userInfo().isEmpty() ? "@" : "",
646 SQUIDSBUFPRINT(authorityForm
));
648 // if the first char is '/' assume its a relative path
649 // XXX: this breaks on scheme-relative URLs,
650 // but we should not see those outside ESI, and rarely there.
651 // XXX: also breaks on any URL containing a '/' in the query-string portion
652 if (relUrl
[0] == '/') {
653 xstrncpy(&urlbuf
[urllen
], relUrl
, MAX_URL
- urllen
- 1);
655 SBuf path
= req
->url
.path();
656 SBuf::size_type lastSlashPos
= path
.rfind('/');
658 if (lastSlashPos
== SBuf::npos
) {
659 // replace the whole path with the given bit(s)
660 urlbuf
[urllen
] = '/';
662 xstrncpy(&urlbuf
[urllen
], relUrl
, MAX_URL
- urllen
- 1);
664 // replace only the last (file?) segment with the given bit(s)
666 if (lastSlashPos
> MAX_URL
- urllen
- 1) {
667 // XXX: crops bits in the middle of the combined URL.
668 lastSlashPos
= MAX_URL
- urllen
- 1;
670 SBufToCstring(&urlbuf
[urllen
], path
.substr(0,lastSlashPos
));
671 urllen
+= lastSlashPos
;
672 if (urllen
+ 1 < MAX_URL
) {
673 xstrncpy(&urlbuf
[urllen
], relUrl
, MAX_URL
- urllen
- 1);
682 matchDomainName(const char *h
, const char *d
, uint flags
)
687 const bool hostIncludesSubdomains
= (*h
== '.');
699 * Start at the ends of the two strings and work towards the
702 while (xtolower(h
[--hl
]) == xtolower(d
[--dl
])) {
703 if (hl
== 0 && dl
== 0) {
705 * We made it all the way to the beginning of both
706 * strings without finding any difference.
713 * The host string is shorter than the domain string.
714 * There is only one case when this can be a match.
715 * If the domain is just one character longer, and if
716 * that character is a leading '.' then we call it a
720 if (1 == dl
&& '.' == d
[0])
728 * The domain string is shorter than the host string.
729 * This is a match only if the first domain character
734 if (flags
& mdnRejectSubsubDomains
) {
735 // Check for sub-sub domain and reject
736 while(--hl
>= 0 && h
[hl
] != '.');
738 // No sub-sub domain found, but reject if there is a
739 // leading dot in given host string (which is removed
740 // before the check is started).
741 return hostIncludesSubdomains
? 1 : 0;
743 return 1; // sub-sub domain, reject
752 * We found different characters in the same position (from the end).
755 // If the h has a form of "*.foo.com" and d has a form of "x.foo.com"
756 // then the h[hl] points to '*', h[hl+1] to '.' and d[dl] to 'x'
757 // The following checks are safe, the "h[hl + 1]" in the worst case is '\0'.
758 if ((flags
& mdnHonorWildcards
) && h
[hl
] == '*' && h
[hl
+ 1] == '.')
762 * If one of those character is '.' then its special. In order
763 * for splay tree sorting to work properly, "x-foo.com" must
764 * be greater than ".foo.com" even though '-' is less than '.'.
772 return (xtolower(h
[hl
]) - xtolower(d
[dl
]));
776 * return true if we can serve requests for this method.
779 urlCheckRequest(const HttpRequest
* r
)
782 /* protocol "independent" methods
784 * actually these methods are specific to HTTP:
785 * they are methods we recieve on our HTTP port,
786 * and if we had a FTP listener would not be relevant
789 * So, we should delegate them to HTTP. The problem is that we
790 * do not have a default protocol from the client side of HTTP.
793 if (r
->method
== Http::METHOD_CONNECT
)
796 // we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
797 // we also support forwarding OPTIONS and TRACE, except for the *-URI ones
798 if (r
->method
== Http::METHOD_OPTIONS
|| r
->method
== Http::METHOD_TRACE
)
799 return (r
->header
.getInt64(Http::HdrType::MAX_FORWARDS
) == 0 || r
->url
.path() != URL::Asterisk());
801 if (r
->method
== Http::METHOD_PURGE
)
804 /* does method match the protocol? */
805 switch (r
->url
.getScheme()) {
807 case AnyP::PROTO_URN
:
809 case AnyP::PROTO_HTTP
:
811 case AnyP::PROTO_CACHE_OBJECT
:
815 case AnyP::PROTO_FTP
:
817 if (r
->method
== Http::METHOD_PUT
)
820 case AnyP::PROTO_GOPHER
:
822 case AnyP::PROTO_WAIS
:
824 case AnyP::PROTO_WHOIS
:
825 if (r
->method
== Http::METHOD_GET
)
827 else if (r
->method
== Http::METHOD_HEAD
)
832 case AnyP::PROTO_HTTPS
:
837 * Squid can't originate an SSL connection, so it should
838 * never receive an "https:" URL. It should always be
853 * Quick-n-dirty host extraction from a URL. Steps:
855 * Skip any '/' after the colon
856 * Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
857 * Look for an ending '/' or ':' and terminate
858 * Look for login info preceeded by '@'
865 char * extract(char const *url
);
868 static char Host
[SQUIDHOSTNAMELEN
];
869 void init(char const *);
870 void findHostStart();
871 void trimTrailingChars();
873 char const *hostStart
;
878 urlHostname(const char *url
)
880 return URLHostName().extract(url
);
883 char URLHostName::Host
[SQUIDHOSTNAMELEN
];
886 URLHostName::init(char const *aUrl
)
893 URLHostName::findHostStart()
895 if (NULL
== (hostStart
= strchr(url
, ':')))
900 while (*hostStart
!= '\0' && *hostStart
== '/')
903 if (*hostStart
== ']')
908 URLHostName::trimTrailingChars()
912 if ((t
= strchr(Host
, '/')))
915 if ((t
= strrchr(Host
, ':')))
918 if ((t
= strchr(Host
, ']')))
923 URLHostName::trimAuth()
927 if ((t
= strrchr(Host
, '@'))) {
929 memmove(Host
, t
, strlen(t
) + 1);
934 URLHostName::extract(char const *aUrl
)
939 if (hostStart
== NULL
)
942 xstrncpy(Host
, hostStart
, SQUIDHOSTNAMELEN
);
951 URL::URL(AnyP::UriScheme
const &aScheme
) :
953 hostIsNumeric_(false),