2 * Copyright (C) 1996-2020 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 /* DEBUG: section 23 URL Parsing */
14 #include "HttpRequest.h"
15 #include "parser/Tokenizer.h"
17 #include "SquidConfig.h"
18 #include "SquidString.h"
20 static const char valid_hostname_chars_u
[] =
21 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
22 "abcdefghijklmnopqrstuvwxyz"
26 static const char valid_hostname_chars
[] =
27 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
28 "abcdefghijklmnopqrstuvwxyz"
36 static SBuf
star("*");
41 AnyP::Uri::SlashPath()
43 static SBuf
slash("/");
48 AnyP::Uri::host(const char *src
)
52 if (hostAddr_
.isAnyAddr()) {
53 xstrncpy(host_
, src
, sizeof(host_
));
54 hostIsNumeric_
= false;
56 hostAddr_
.toHostStr(host_
, sizeof(host_
));
57 debugs(23, 3, "given IP: " << hostAddr_
);
64 AnyP::Uri::hostOrIp() const
66 static char ip
[MAX_IPSTRLEN
];
68 return SBuf(hostIP().toStr(ip
, sizeof(ip
)));
74 AnyP::Uri::path() const
76 // RFC 3986 section 3.3 says path can be empty (path-abempty).
77 // RFC 7230 sections 2.7.3, 5.3.1, 5.7.2 - says path cannot be empty, default to "/"
78 // at least when sending and using. We must still accept path-abempty as input.
79 if (path_
.isEmpty() && (scheme_
== AnyP::PROTO_HTTP
|| scheme_
== AnyP::PROTO_HTTPS
))
88 debugs(23, 5, "urlInitialize: Initializing...");
89 /* this ensures that the number of protocol strings is the same as
90 * the enum slots allocated because the last enum is always 'MAX'.
92 assert(strcmp(AnyP::ProtocolType_str
[AnyP::PROTO_MAX
], "MAX") == 0);
94 * These test that our matchDomainName() function works the
95 * way we expect it to.
97 assert(0 == matchDomainName("foo.com", "foo.com"));
98 assert(0 == matchDomainName(".foo.com", "foo.com"));
99 assert(0 == matchDomainName("foo.com", ".foo.com"));
100 assert(0 == matchDomainName(".foo.com", ".foo.com"));
101 assert(0 == matchDomainName("x.foo.com", ".foo.com"));
102 assert(0 == matchDomainName("y.x.foo.com", ".foo.com"));
103 assert(0 != matchDomainName("x.foo.com", "foo.com"));
104 assert(0 != matchDomainName("foo.com", "x.foo.com"));
105 assert(0 != matchDomainName("bar.com", "foo.com"));
106 assert(0 != matchDomainName(".bar.com", "foo.com"));
107 assert(0 != matchDomainName(".bar.com", ".foo.com"));
108 assert(0 != matchDomainName("bar.com", ".foo.com"));
109 assert(0 < matchDomainName("zzz.com", "foo.com"));
110 assert(0 > matchDomainName("aaa.com", "foo.com"));
111 assert(0 == matchDomainName("FOO.com", "foo.COM"));
112 assert(0 < matchDomainName("bfoo.com", "afoo.com"));
113 assert(0 > matchDomainName("afoo.com", "bfoo.com"));
114 assert(0 < matchDomainName("x-foo.com", ".foo.com"));
116 assert(0 == matchDomainName(".foo.com", ".foo.com", mdnRejectSubsubDomains
));
117 assert(0 == matchDomainName("x.foo.com", ".foo.com", mdnRejectSubsubDomains
));
118 assert(0 != matchDomainName("y.x.foo.com", ".foo.com", mdnRejectSubsubDomains
));
119 assert(0 != matchDomainName(".x.foo.com", ".foo.com", mdnRejectSubsubDomains
));
121 assert(0 == matchDomainName("*.foo.com", "x.foo.com", mdnHonorWildcards
));
122 assert(0 == matchDomainName("*.foo.com", ".x.foo.com", mdnHonorWildcards
));
123 assert(0 == matchDomainName("*.foo.com", ".foo.com", mdnHonorWildcards
));
124 assert(0 != matchDomainName("*.foo.com", "foo.com", mdnHonorWildcards
));
130 * Extract the URI scheme and ':' delimiter from the given input buffer.
132 * Schemes up to 16 characters are accepted.
134 * Governed by RFC 3986 section 3.1
136 static AnyP::UriScheme
137 uriParseScheme(Parser::Tokenizer
&tok
)
140 * RFC 3986 section 3.1 paragraph 2:
142 * Scheme names consist of a sequence of characters beginning with a
143 * letter and followed by any combination of letters, digits, plus
144 * ("+"), period ("."), or hyphen ("-").
146 * The underscore ("_") required to match "cache_object://" squid
147 * special URI scheme.
149 static const auto schemeChars
=
150 #if USE_HTTP_VIOLATIONS
151 CharacterSet("special", "_") +
153 CharacterSet("scheme", "+.-") + CharacterSet::ALPHA
+ CharacterSet::DIGIT
;
156 if (tok
.prefix(str
, schemeChars
, 16) && tok
.skip(':') && CharacterSet::ALPHA
[str
.at(0)]) {
157 const auto protocol
= AnyP::UriScheme::FindProtocolType(str
);
158 if (protocol
== AnyP::PROTO_UNKNOWN
)
159 return AnyP::UriScheme(protocol
, str
.c_str());
160 return AnyP::UriScheme(protocol
, nullptr);
163 throw TextException("invalid URI scheme", Here());
167 * Appends configured append_domain to hostname, assuming
168 * the given buffer is at least SQUIDHOSTNAMELEN bytes long,
169 * and that the host FQDN is not a 'dotless' TLD.
171 * \returns false if and only if there is not enough space to append
174 urlAppendDomain(char *host
)
176 /* For IPv4 addresses check for a dot */
177 /* For IPv6 addresses also check for a colon */
178 if (Config
.appendDomain
&& !strchr(host
, '.') && !strchr(host
, ':')) {
179 const uint64_t dlen
= strlen(host
);
180 const uint64_t want
= dlen
+ Config
.appendDomainLen
;
181 if (want
> SQUIDHOSTNAMELEN
- 1) {
182 debugs(23, 2, "URL domain too large (" << dlen
<< " bytes)");
185 strncat(host
, Config
.appendDomain
, SQUIDHOSTNAMELEN
- dlen
- 1);
193 * It is assumed that the URL is complete -
194 * ie, the end of the string is the end of the URL. Don't pass a partial
195 * URL here as this routine doesn't have any way of knowing whether
196 * it is partial or not (ie, it handles the case of no trailing slash as
197 * being "end of host with implied path of /".
199 * method is used to switch parsers. If method is Http::METHOD_CONNECT,
200 * then rather than a URL a hostname:port is looked for.
203 AnyP::Uri::parse(const HttpRequestMethod
& method
, const SBuf
&rawUrl
)
207 LOCAL_ARRAY(char, login
, MAX_URL
);
208 LOCAL_ARRAY(char, foundHost
, MAX_URL
);
209 LOCAL_ARRAY(char, urlpath
, MAX_URL
);
217 foundHost
[0] = urlpath
[0] = login
[0] = '\0';
219 if ((l
= rawUrl
.length()) + Config
.appendDomainLen
> (MAX_URL
- 1)) {
220 debugs(23, DBG_IMPORTANT
, MYNAME
<< "URL too large (" << l
<< " bytes)");
224 if ((method
== Http::METHOD_OPTIONS
|| method
== Http::METHOD_TRACE
) &&
225 Asterisk().cmp(rawUrl
) == 0) {
226 // XXX: these methods might also occur in HTTPS traffic. Handle this better.
227 setScheme(AnyP::PROTO_HTTP
, nullptr);
228 port(getScheme().defaultPort());
233 Parser::Tokenizer
tok(rawUrl
);
234 AnyP::UriScheme scheme
;
236 if (method
== Http::METHOD_CONNECT
) {
238 * RFC 7230 section 5.3.3: authority-form = authority
239 * "excluding any userinfo and its "@" delimiter"
241 * RFC 3986 section 3.2: authority = [ userinfo "@" ] host [ ":" port ]
243 * As an HTTP(S) proxy we assume HTTPS (443) if no port provided.
247 // XXX: use tokenizer
249 const char *url
= B
.c_str();
251 if (sscanf(url
, "[%[^]]]:%d", foundHost
, &foundPort
) < 1)
252 if (sscanf(url
, "%[^:]:%d", foundHost
, &foundPort
) < 1)
257 scheme
= uriParseScheme(tok
);
259 if (scheme
== AnyP::PROTO_NONE
)
260 return false; // invalid scheme
262 if (scheme
== AnyP::PROTO_URN
) {
263 parseUrn(tok
); // throws on any error
267 // URLs then have "//"
268 static const SBuf
doubleSlash("//");
269 if (!tok
.skip(doubleSlash
))
272 auto B
= tok
.remaining();
273 const char *url
= B
.c_str();
279 /* Then everything until first /; thats host (and port; which we'll look for here later) */
280 // bug 1881: If we don't get a "/" then we imply it was there
281 // bug 3074: We could just be given a "?" or "#". These also imply "/"
282 // bug 3233: whitespace is also a hostname delimiter.
283 for (dst
= foundHost
; i
< l
&& *src
!= '/' && *src
!= '?' && *src
!= '#' && *src
!= '\0' && !xisspace(*src
); ++i
, ++src
, ++dst
) {
288 * We can't check for "i >= l" here because we could be at the end of the line
289 * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
290 * been -given- a valid URL and the path is just '/'.
296 // bug 3074: received 'path' starting with '?', '#', or '\0' implies '/'
297 if (*src
== '?' || *src
== '#' || *src
== '\0') {
303 /* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
304 for (; i
< l
&& *src
!= '\r' && *src
!= '\n' && *src
!= '\0'; ++i
, ++src
, ++dst
) {
308 /* We -could- be at the end of the buffer here */
311 /* If the URL path is empty we set it to be "/" */
312 if (dst
== urlpath
) {
318 foundPort
= scheme
.defaultPort(); // may be reset later
320 /* Is there any login information? (we should eventually parse it above) */
321 t
= strrchr(foundHost
, '@');
323 strncpy((char *) login
, (char *) foundHost
, sizeof(login
)-1);
324 login
[sizeof(login
)-1] = '\0';
325 t
= strrchr(login
, '@');
327 strncpy((char *) foundHost
, t
+ 1, sizeof(foundHost
)-1);
328 foundHost
[sizeof(foundHost
)-1] = '\0';
329 // Bug 4498: URL-unescape the login info after extraction
330 rfc1738_unescape(login
);
333 /* Is there any host information? (we should eventually parse it above) */
334 if (*foundHost
== '[') {
335 /* strip any IPA brackets. valid under IPv6. */
337 /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
340 l
= strlen(foundHost
);
342 for (; i
< l
&& *src
!= ']' && *src
!= '\0'; ++i
, ++src
, ++dst
) {
346 /* we moved in-place, so truncate the actual hostname found */
350 /* skip ahead to either start of port, or original EOS */
351 while (*dst
!= '\0' && *dst
!= ':')
355 t
= strrchr(foundHost
, ':');
357 if (t
!= strchr(foundHost
,':') ) {
358 /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
359 /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
360 /* therefore we MUST accept the case where they are not bracketed at all. */
365 // Bug 3183 sanity check: If scheme is present, host must be too.
366 if (scheme
!= AnyP::PROTO_NONE
&& foundHost
[0] == '\0') {
367 debugs(23, DBG_IMPORTANT
, "SECURITY ALERT: Missing hostname in URL '" << url
<< "'. see access.log for details.");
371 if (t
&& *t
== ':') {
378 for (t
= foundHost
; *t
; ++t
)
381 if (stringHasWhitespace(foundHost
)) {
382 if (URI_WHITESPACE_STRIP
== Config
.uri_whitespace
) {
395 debugs(23, 3, "Split URL '" << rawUrl
<< "' into proto='" << scheme
.image() << "', host='" << foundHost
<< "', port='" << foundPort
<< "', path='" << urlpath
<< "'");
397 if (Config
.onoff
.check_hostnames
&&
398 strspn(foundHost
, Config
.onoff
.allow_underscore
? valid_hostname_chars_u
: valid_hostname_chars
) != strlen(foundHost
)) {
399 debugs(23, DBG_IMPORTANT
, MYNAME
<< "Illegal character in hostname '" << foundHost
<< "'");
403 if (!urlAppendDomain(foundHost
))
406 /* remove trailing dots from hostnames */
407 while ((l
= strlen(foundHost
)) > 0 && foundHost
[--l
] == '.')
410 /* reject duplicate or leading dots */
411 if (strstr(foundHost
, "..") || *foundHost
== '.') {
412 debugs(23, DBG_IMPORTANT
, MYNAME
<< "Illegal hostname '" << foundHost
<< "'");
416 if (foundPort
< 1 || foundPort
> 65535) {
417 debugs(23, 3, "Invalid port '" << foundPort
<< "'");
421 #if HARDCODE_DENY_PORTS
422 /* These ports are filtered in the default squid.conf, but
423 * maybe someone wants them hardcoded... */
424 if (foundPort
== 7 || foundPort
== 9 || foundPort
== 19) {
425 debugs(23, DBG_CRITICAL
, MYNAME
<< "Deny access to port " << foundPort
);
430 if (stringHasWhitespace(urlpath
)) {
431 debugs(23, 2, "URI has whitespace: {" << rawUrl
<< "}");
433 switch (Config
.uri_whitespace
) {
435 case URI_WHITESPACE_DENY
:
438 case URI_WHITESPACE_ALLOW
:
441 case URI_WHITESPACE_ENCODE
:
442 t
= rfc1738_escape_unescaped(urlpath
);
443 xstrncpy(urlpath
, t
, MAX_URL
);
446 case URI_WHITESPACE_CHOP
:
447 *(urlpath
+ strcspn(urlpath
, w_space
)) = '\0';
450 case URI_WHITESPACE_STRIP
:
467 userInfo(SBuf(login
));
472 debugs(23, 2, "error: " << CurrentException
<< " " << Raw("rawUrl", rawUrl
.rawContent(), rawUrl
.length()));
478 * Governed by RFC 8141 section 2:
480 * assigned-name = "urn" ":" NID ":" NSS
481 * NID = (alphanum) 0*30(ldh) (alphanum)
482 * ldh = alphanum / "-"
483 * NSS = pchar *(pchar / "/")
485 * RFC 3986 Appendix D.2 defines (as deprecated):
487 * alphanum = ALPHA / DIGIT
489 * Notice that NID is exactly 2-32 characters in length.
492 AnyP::Uri::parseUrn(Parser::Tokenizer
&tok
)
494 static const auto nidChars
= CharacterSet("NID","-") + CharacterSet::ALPHA
+ CharacterSet::DIGIT
;
495 static const auto alphanum
= (CharacterSet::ALPHA
+ CharacterSet::DIGIT
).rename("alphanum");
497 if (!tok
.prefix(nid
, nidChars
, 32))
498 throw TextException("NID not found", Here());
501 throw TextException("NID too long or missing ':' delimiter", Here());
503 if (nid
.length() < 2)
504 throw TextException("NID too short", Here());
506 if (!alphanum
[*nid
.begin()])
507 throw TextException("NID prefix is not alphanumeric", Here());
509 if (!alphanum
[*nid
.rbegin()])
510 throw TextException("NID suffix is not alphanumeric", Here());
512 setScheme(AnyP::PROTO_URN
, nullptr);
514 // TODO validate path characters
515 path(tok
.remaining());
516 debugs(23, 3, "Split URI into proto=urn, nid=" << nid
<< ", " << Raw("path",path().rawContent(),path().length()));
523 authorityHttp_
.clear();
524 authorityWithPort_
.clear();
528 AnyP::Uri::authority(bool requirePort
) const
530 if (authorityHttp_
.isEmpty()) {
532 // both formats contain Host/IP
533 authorityWithPort_
.append(host());
534 authorityHttp_
= authorityWithPort_
;
536 // authorityForm_ only has :port if it is non-default
537 authorityWithPort_
.appendf(":%u",port());
538 if (port() != getScheme().defaultPort())
539 authorityHttp_
= authorityWithPort_
;
542 return requirePort
? authorityWithPort_
: authorityHttp_
;
546 AnyP::Uri::absolute() const
548 if (absolute_
.isEmpty()) {
549 // TODO: most URL will be much shorter, avoid allocating this much
550 absolute_
.reserveCapacity(MAX_URL
);
552 absolute_
.append(getScheme().image());
553 absolute_
.append(":",1);
554 if (getScheme() != AnyP::PROTO_URN
) {
555 absolute_
.append("//", 2);
556 const bool allowUserInfo
= getScheme() == AnyP::PROTO_FTP
||
557 getScheme() == AnyP::PROTO_UNKNOWN
;
559 if (allowUserInfo
&& !userInfo().isEmpty()) {
560 absolute_
.append(userInfo());
561 absolute_
.append("@", 1);
563 absolute_
.append(authority());
565 absolute_
.append(host());
566 absolute_
.append(":", 1);
568 absolute_
.append(path());
574 /** \todo AYJ: Performance: This is an *almost* duplicate of HttpRequest::effectiveRequestUri(). But elides the query-string.
575 * After copying it on in the first place! Would be less code to merge the two with a flag parameter.
576 * and never copy the query-string part in the first place
579 urlCanonicalCleanWithoutRequest(const SBuf
&url
, const HttpRequestMethod
&method
, const AnyP::UriScheme
&scheme
)
581 LOCAL_ARRAY(char, buf
, MAX_URL
);
583 snprintf(buf
, sizeof(buf
), SQUIDSBUFPH
, SQUIDSBUFPRINT(url
));
584 buf
[sizeof(buf
)-1] = '\0';
586 // URN, CONNECT method, and non-stripped URIs can go straight out
587 if (Config
.onoff
.strip_query_terms
&& !(method
== Http::METHOD_CONNECT
|| scheme
== AnyP::PROTO_URN
)) {
588 // strip anything AFTER a question-mark
589 // leaving the '?' in place
590 if (auto t
= strchr(buf
, '?')) {
595 if (stringHasCntl(buf
))
596 xstrncpy(buf
, rfc1738_escape_unescaped(buf
), MAX_URL
);
602 * Yet another alternative to urlCanonical.
603 * This one adds the https:// parts to Http::METHOD_CONNECT URL
604 * for use in error page outputs.
605 * Luckily we can leverage the others instead of duplicating.
608 urlCanonicalFakeHttps(const HttpRequest
* request
)
610 LOCAL_ARRAY(char, buf
, MAX_URL
);
612 // method CONNECT and port HTTPS
613 if (request
->method
== Http::METHOD_CONNECT
&& request
->url
.port() == 443) {
614 snprintf(buf
, MAX_URL
, "https://%s/*", request
->url
.host());
618 // else do the normal complete canonical thing.
619 return request
->canonicalCleanUrl();
623 * Test if a URL is relative.
625 * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
626 * appear before a ':'.
629 urlIsRelative(const char *url
)
640 for (p
= url
; *p
!= '\0' && *p
!= ':' && *p
!= '/'; ++p
);
649 * Convert a relative URL to an absolute URL using the context of a given
652 * It is assumed that you have already ensured that the URL is relative.
654 * If NULL is returned it is an indication that the method in use in the
655 * request does not distinguish between relative and absolute and you should
656 * use the url unchanged.
658 * If non-NULL is returned, it is up to the caller to free the resulting
659 * memory using safe_free().
662 urlMakeAbsolute(const HttpRequest
* req
, const char *relUrl
)
665 if (req
->method
.id() == Http::METHOD_CONNECT
) {
669 char *urlbuf
= (char *)xmalloc(MAX_URL
* sizeof(char));
671 if (req
->url
.getScheme() == AnyP::PROTO_URN
) {
672 // XXX: this is what the original code did, but it seems to break the
673 // intended behaviour of this function. It returns the stored URN path,
674 // not converting the given one into a URN...
675 snprintf(urlbuf
, MAX_URL
, SQUIDSBUFPH
, SQUIDSBUFPRINT(req
->url
.absolute()));
679 SBuf authorityForm
= req
->url
.authority(); // host[:port]
680 const SBuf
&scheme
= req
->url
.getScheme().image();
681 size_t urllen
= snprintf(urlbuf
, MAX_URL
, SQUIDSBUFPH
"://" SQUIDSBUFPH
"%s" SQUIDSBUFPH
,
682 SQUIDSBUFPRINT(scheme
),
683 SQUIDSBUFPRINT(req
->url
.userInfo()),
684 !req
->url
.userInfo().isEmpty() ? "@" : "",
685 SQUIDSBUFPRINT(authorityForm
));
687 // if the first char is '/' assume its a relative path
688 // XXX: this breaks on scheme-relative URLs,
689 // but we should not see those outside ESI, and rarely there.
690 // XXX: also breaks on any URL containing a '/' in the query-string portion
691 if (relUrl
[0] == '/') {
692 xstrncpy(&urlbuf
[urllen
], relUrl
, MAX_URL
- urllen
- 1);
694 SBuf path
= req
->url
.path();
695 SBuf::size_type lastSlashPos
= path
.rfind('/');
697 if (lastSlashPos
== SBuf::npos
) {
698 // replace the whole path with the given bit(s)
699 urlbuf
[urllen
] = '/';
701 xstrncpy(&urlbuf
[urllen
], relUrl
, MAX_URL
- urllen
- 1);
703 // replace only the last (file?) segment with the given bit(s)
705 if (lastSlashPos
> MAX_URL
- urllen
- 1) {
706 // XXX: crops bits in the middle of the combined URL.
707 lastSlashPos
= MAX_URL
- urllen
- 1;
709 SBufToCstring(&urlbuf
[urllen
], path
.substr(0,lastSlashPos
));
710 urllen
+= lastSlashPos
;
711 if (urllen
+ 1 < MAX_URL
) {
712 xstrncpy(&urlbuf
[urllen
], relUrl
, MAX_URL
- urllen
- 1);
721 matchDomainName(const char *h
, const char *d
, MatchDomainNameFlags flags
)
726 const bool hostIncludesSubdomains
= (*h
== '.');
738 * Start at the ends of the two strings and work towards the
741 while (xtolower(h
[--hl
]) == xtolower(d
[--dl
])) {
742 if (hl
== 0 && dl
== 0) {
744 * We made it all the way to the beginning of both
745 * strings without finding any difference.
752 * The host string is shorter than the domain string.
753 * There is only one case when this can be a match.
754 * If the domain is just one character longer, and if
755 * that character is a leading '.' then we call it a
759 if (1 == dl
&& '.' == d
[0])
767 * The domain string is shorter than the host string.
768 * This is a match only if the first domain character
773 if (flags
& mdnRejectSubsubDomains
) {
774 // Check for sub-sub domain and reject
775 while(--hl
>= 0 && h
[hl
] != '.');
777 // No sub-sub domain found, but reject if there is a
778 // leading dot in given host string (which is removed
779 // before the check is started).
780 return hostIncludesSubdomains
? 1 : 0;
782 return 1; // sub-sub domain, reject
791 * We found different characters in the same position (from the end).
794 // If the h has a form of "*.foo.com" and d has a form of "x.foo.com"
795 // then the h[hl] points to '*', h[hl+1] to '.' and d[dl] to 'x'
796 // The following checks are safe, the "h[hl + 1]" in the worst case is '\0'.
797 if ((flags
& mdnHonorWildcards
) && h
[hl
] == '*' && h
[hl
+ 1] == '.')
801 * If one of those character is '.' then its special. In order
802 * for splay tree sorting to work properly, "x-foo.com" must
803 * be greater than ".foo.com" even though '-' is less than '.'.
811 return (xtolower(h
[hl
]) - xtolower(d
[dl
]));
815 * return true if we can serve requests for this method.
818 urlCheckRequest(const HttpRequest
* r
)
821 /* protocol "independent" methods
823 * actually these methods are specific to HTTP:
824 * they are methods we recieve on our HTTP port,
825 * and if we had a FTP listener would not be relevant
828 * So, we should delegate them to HTTP. The problem is that we
829 * do not have a default protocol from the client side of HTTP.
832 if (r
->method
== Http::METHOD_CONNECT
)
835 // we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
836 // we also support forwarding OPTIONS and TRACE, except for the *-URI ones
837 if (r
->method
== Http::METHOD_OPTIONS
|| r
->method
== Http::METHOD_TRACE
)
838 return (r
->header
.getInt64(Http::HdrType::MAX_FORWARDS
) == 0 || r
->url
.path() != AnyP::Uri::Asterisk());
840 if (r
->method
== Http::METHOD_PURGE
)
843 /* does method match the protocol? */
844 switch (r
->url
.getScheme()) {
846 case AnyP::PROTO_URN
:
848 case AnyP::PROTO_HTTP
:
850 case AnyP::PROTO_CACHE_OBJECT
:
854 case AnyP::PROTO_FTP
:
856 if (r
->method
== Http::METHOD_PUT
)
859 case AnyP::PROTO_GOPHER
:
861 case AnyP::PROTO_WAIS
:
863 case AnyP::PROTO_WHOIS
:
864 if (r
->method
== Http::METHOD_GET
)
866 else if (r
->method
== Http::METHOD_HEAD
)
871 case AnyP::PROTO_HTTPS
:
878 * Squid can't originate an SSL connection, so it should
879 * never receive an "https:" URL. It should always be
894 * Quick-n-dirty host extraction from a URL. Steps:
896 * Skip any '/' after the colon
897 * Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
898 * Look for an ending '/' or ':' and terminate
899 * Look for login info preceeded by '@'
906 char * extract(char const *url
);
909 static char Host
[SQUIDHOSTNAMELEN
];
910 void init(char const *);
911 void findHostStart();
912 void trimTrailingChars();
914 char const *hostStart
;
919 urlHostname(const char *url
)
921 return URLHostName().extract(url
);
924 char URLHostName::Host
[SQUIDHOSTNAMELEN
];
927 URLHostName::init(char const *aUrl
)
934 URLHostName::findHostStart()
936 if (NULL
== (hostStart
= strchr(url
, ':')))
941 while (*hostStart
!= '\0' && *hostStart
== '/')
944 if (*hostStart
== ']')
949 URLHostName::trimTrailingChars()
953 if ((t
= strchr(Host
, '/')))
956 if ((t
= strrchr(Host
, ':')))
959 if ((t
= strchr(Host
, ']')))
964 URLHostName::trimAuth()
968 if ((t
= strrchr(Host
, '@'))) {
970 memmove(Host
, t
, strlen(t
) + 1);
975 URLHostName::extract(char const *aUrl
)
980 if (hostStart
== NULL
)
983 xstrncpy(Host
, hostStart
, SQUIDHOSTNAMELEN
);
992 AnyP::Uri::Uri(AnyP::UriScheme
const &aScheme
) :
994 hostIsNumeric_(false),
1000 // TODO: fix code duplication with AnyP::Uri::parse()
1002 AnyP::Uri::cleanup(const char *uri
)
1005 char *cleanedUri
= nullptr;
1006 switch (Config
.uri_whitespace
) {
1007 case URI_WHITESPACE_ALLOW
:
1008 flags
|= RFC1738_ESCAPE_NOSPACE
;
1009 // fall through to next case
1010 case URI_WHITESPACE_ENCODE
:
1011 flags
|= RFC1738_ESCAPE_UNESCAPED
;
1012 cleanedUri
= xstrndup(rfc1738_do_escape(uri
, flags
), MAX_URL
);
1015 case URI_WHITESPACE_CHOP
: {
1016 flags
|= RFC1738_ESCAPE_UNESCAPED
;
1017 const auto pos
= strcspn(uri
, w_space
);
1018 char *choppedUri
= nullptr;
1019 if (pos
< strlen(uri
))
1020 choppedUri
= xstrndup(uri
, pos
+ 1);
1021 cleanedUri
= xstrndup(rfc1738_do_escape(choppedUri
? choppedUri
: uri
, flags
), MAX_URL
);
1022 cleanedUri
[pos
] = '\0';
1027 case URI_WHITESPACE_DENY
:
1028 case URI_WHITESPACE_STRIP
:
1030 // TODO: avoid duplication with urlParse()
1032 char *tmp_uri
= static_cast<char*>(xmalloc(strlen(uri
) + 1));
1036 if (!xisspace(*t
)) {
1043 cleanedUri
= xstrndup(rfc1738_escape_unescaped(tmp_uri
), MAX_URL
);