2 * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 /* DEBUG: section 23 URL Parsing */
15 #include "HttpRequest.h"
16 #include "parser/Tokenizer.h"
18 #include "SquidConfig.h"
19 #include "SquidString.h"
21 static const char valid_hostname_chars_u
[] =
22 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
23 "abcdefghijklmnopqrstuvwxyz"
27 static const char valid_hostname_chars
[] =
28 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
29 "abcdefghijklmnopqrstuvwxyz"
34 /// Characters which are valid within a URI userinfo section
35 static const CharacterSet
&
39 * RFC 3986 section 3.2.1
41 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
42 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
43 * pct-encoded = "%" HEXDIG HEXDIG
44 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
46 static const auto userInfoValid
= CharacterSet("userinfo", ":-._~%!$&'()*+,;=") +
53 * Governed by RFC 3986 section 2.1
56 AnyP::Uri::Encode(const SBuf
&buf
, const CharacterSet
&ignore
)
61 Parser::Tokenizer
tk(buf
);
63 // optimization for the arguably common "no encoding necessary" case
64 if (tk
.prefix(goodSection
, ignore
) && tk
.atEnd())
68 output
.reserveSpace(buf
.length() * 3); // worst case: encode all chars
69 output
.append(goodSection
); // may be empty
72 // TODO: Add Tokenizer::parseOne(void).
73 const auto ch
= tk
.remaining()[0];
74 output
.appendf("%%%02X", static_cast<unsigned int>(ch
)); // TODO: Optimize using a table
77 if (tk
.prefix(goodSection
, ignore
))
78 output
.append(goodSection
);
87 static SBuf
star("*");
92 AnyP::Uri::SlashPath()
94 static SBuf
slash("/");
99 AnyP::Uri::host(const char *src
)
101 hostAddr_
.fromHost(src
);
102 if (hostAddr_
.isAnyAddr()) {
103 xstrncpy(host_
, src
, sizeof(host_
));
104 hostIsNumeric_
= false;
106 hostAddr_
.toHostStr(host_
, sizeof(host_
));
107 debugs(23, 3, "given IP: " << hostAddr_
);
114 AnyP::Uri::hostOrIp() const
116 if (hostIsNumeric()) {
117 static char ip
[MAX_IPSTRLEN
];
118 const auto hostStrLen
= hostIP().toHostStr(ip
, sizeof(ip
));
119 return SBuf(ip
, hostStrLen
);
125 AnyP::Uri::path() const
127 // RFC 3986 section 3.3 says path can be empty (path-abempty).
128 // RFC 7230 sections 2.7.3, 5.3.1, 5.7.2 - says path cannot be empty, default to "/"
129 // at least when sending and using. We must still accept path-abempty as input.
130 if (path_
.isEmpty() && (scheme_
== AnyP::PROTO_HTTP
|| scheme_
== AnyP::PROTO_HTTPS
))
139 debugs(23, 5, "urlInitialize: Initializing...");
140 /* this ensures that the number of protocol strings is the same as
141 * the enum slots allocated because the last enum is always 'MAX'.
143 assert(strcmp(AnyP::ProtocolType_str
[AnyP::PROTO_MAX
], "MAX") == 0);
145 * These test that our matchDomainName() function works the
146 * way we expect it to.
148 assert(0 == matchDomainName("foo.com", "foo.com"));
149 assert(0 == matchDomainName(".foo.com", "foo.com"));
150 assert(0 == matchDomainName("foo.com", ".foo.com"));
151 assert(0 == matchDomainName(".foo.com", ".foo.com"));
152 assert(0 == matchDomainName("x.foo.com", ".foo.com"));
153 assert(0 == matchDomainName("y.x.foo.com", ".foo.com"));
154 assert(0 != matchDomainName("x.foo.com", "foo.com"));
155 assert(0 != matchDomainName("foo.com", "x.foo.com"));
156 assert(0 != matchDomainName("bar.com", "foo.com"));
157 assert(0 != matchDomainName(".bar.com", "foo.com"));
158 assert(0 != matchDomainName(".bar.com", ".foo.com"));
159 assert(0 != matchDomainName("bar.com", ".foo.com"));
160 assert(0 < matchDomainName("zzz.com", "foo.com"));
161 assert(0 > matchDomainName("aaa.com", "foo.com"));
162 assert(0 == matchDomainName("FOO.com", "foo.COM"));
163 assert(0 < matchDomainName("bfoo.com", "afoo.com"));
164 assert(0 > matchDomainName("afoo.com", "bfoo.com"));
165 assert(0 < matchDomainName("x-foo.com", ".foo.com"));
167 assert(0 == matchDomainName(".foo.com", ".foo.com", mdnRejectSubsubDomains
));
168 assert(0 == matchDomainName("x.foo.com", ".foo.com", mdnRejectSubsubDomains
));
169 assert(0 != matchDomainName("y.x.foo.com", ".foo.com", mdnRejectSubsubDomains
));
170 assert(0 != matchDomainName(".x.foo.com", ".foo.com", mdnRejectSubsubDomains
));
172 assert(0 == matchDomainName("*.foo.com", "x.foo.com", mdnHonorWildcards
));
173 assert(0 == matchDomainName("*.foo.com", ".x.foo.com", mdnHonorWildcards
));
174 assert(0 == matchDomainName("*.foo.com", ".foo.com", mdnHonorWildcards
));
175 assert(0 != matchDomainName("*.foo.com", "foo.com", mdnHonorWildcards
));
181 * Extract the URI scheme and ':' delimiter from the given input buffer.
183 * Schemes up to 16 characters are accepted.
185 * Governed by RFC 3986 section 3.1
187 static AnyP::UriScheme
188 uriParseScheme(Parser::Tokenizer
&tok
)
191 * RFC 3986 section 3.1 paragraph 2:
193 * Scheme names consist of a sequence of characters beginning with a
194 * letter and followed by any combination of letters, digits, plus
195 * ("+"), period ("."), or hyphen ("-").
197 * The underscore ("_") required to match "cache_object://" squid
198 * special URI scheme.
200 static const auto schemeChars
=
201 #if USE_HTTP_VIOLATIONS
202 CharacterSet("special", "_") +
204 CharacterSet("scheme", "+.-") + CharacterSet::ALPHA
+ CharacterSet::DIGIT
;
207 if (tok
.prefix(str
, schemeChars
, 16) && tok
.skip(':') && CharacterSet::ALPHA
[str
.at(0)]) {
208 const auto protocol
= AnyP::UriScheme::FindProtocolType(str
);
209 if (protocol
== AnyP::PROTO_UNKNOWN
)
210 return AnyP::UriScheme(protocol
, str
.c_str());
211 return AnyP::UriScheme(protocol
, nullptr);
214 throw TextException("invalid URI scheme", Here());
218 * Appends configured append_domain to hostname, assuming
219 * the given buffer is at least SQUIDHOSTNAMELEN bytes long,
220 * and that the host FQDN is not a 'dotless' TLD.
222 * \returns false if and only if there is not enough space to append
225 urlAppendDomain(char *host
)
227 /* For IPv4 addresses check for a dot */
228 /* For IPv6 addresses also check for a colon */
229 if (Config
.appendDomain
&& !strchr(host
, '.') && !strchr(host
, ':')) {
230 const uint64_t dlen
= strlen(host
);
231 const uint64_t want
= dlen
+ Config
.appendDomainLen
;
232 if (want
> SQUIDHOSTNAMELEN
- 1) {
233 debugs(23, 2, "URL domain too large (" << dlen
<< " bytes)");
236 strncat(host
, Config
.appendDomain
, SQUIDHOSTNAMELEN
- dlen
- 1);
244 * It is assumed that the URL is complete -
245 * ie, the end of the string is the end of the URL. Don't pass a partial
246 * URL here as this routine doesn't have any way of knowing whether
247 * it is partial or not (ie, it handles the case of no trailing slash as
248 * being "end of host with implied path of /".
250 * method is used to switch parsers. If method is Http::METHOD_CONNECT,
251 * then rather than a URL a hostname:port is looked for.
254 AnyP::Uri::parse(const HttpRequestMethod
& method
, const SBuf
&rawUrl
)
258 LOCAL_ARRAY(char, login
, MAX_URL
);
259 LOCAL_ARRAY(char, foundHost
, MAX_URL
);
260 LOCAL_ARRAY(char, urlpath
, MAX_URL
);
268 foundHost
[0] = urlpath
[0] = login
[0] = '\0';
270 if ((l
= rawUrl
.length()) + Config
.appendDomainLen
> (MAX_URL
- 1)) {
271 debugs(23, DBG_IMPORTANT
, MYNAME
<< "URL too large (" << l
<< " bytes)");
275 if ((method
== Http::METHOD_OPTIONS
|| method
== Http::METHOD_TRACE
) &&
276 Asterisk().cmp(rawUrl
) == 0) {
277 // XXX: these methods might also occur in HTTPS traffic. Handle this better.
278 setScheme(AnyP::PROTO_HTTP
, nullptr);
279 port(getScheme().defaultPort());
284 Parser::Tokenizer
tok(rawUrl
);
285 AnyP::UriScheme scheme
;
287 if (method
== Http::METHOD_CONNECT
) {
289 * RFC 7230 section 5.3.3: authority-form = authority
290 * "excluding any userinfo and its "@" delimiter"
292 * RFC 3986 section 3.2: authority = [ userinfo "@" ] host [ ":" port ]
294 * As an HTTP(S) proxy we assume HTTPS (443) if no port provided.
298 // XXX: use tokenizer
300 const char *url
= B
.c_str();
302 if (sscanf(url
, "[%[^]]]:%d", foundHost
, &foundPort
) < 1)
303 if (sscanf(url
, "%[^:]:%d", foundHost
, &foundPort
) < 1)
308 scheme
= uriParseScheme(tok
);
310 if (scheme
== AnyP::PROTO_NONE
)
311 return false; // invalid scheme
313 if (scheme
== AnyP::PROTO_URN
) {
314 parseUrn(tok
); // throws on any error
318 // URLs then have "//"
319 static const SBuf
doubleSlash("//");
320 if (!tok
.skip(doubleSlash
))
323 auto B
= tok
.remaining();
324 const char *url
= B
.c_str();
330 /* Then everything until first /; that's host (and port; which we'll look for here later) */
331 // bug 1881: If we don't get a "/" then we imply it was there
332 // bug 3074: We could just be given a "?" or "#". These also imply "/"
333 // bug 3233: whitespace is also a hostname delimiter.
334 for (dst
= foundHost
; i
< l
&& *src
!= '/' && *src
!= '?' && *src
!= '#' && *src
!= '\0' && !xisspace(*src
); ++i
, ++src
, ++dst
) {
339 * We can't check for "i >= l" here because we could be at the end of the line
340 * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
341 * been -given- a valid URL and the path is just '/'.
347 // We are looking at path-abempty.
349 // path-empty, including the end of the `src` c-string cases
355 /* Then everything from / (inclusive) until \r\n or \0 - that's urlpath */
356 for (; i
< l
&& *src
!= '\r' && *src
!= '\n' && *src
!= '\0'; ++i
, ++src
, ++dst
) {
360 /* We -could- be at the end of the buffer here */
365 foundPort
= scheme
.defaultPort(); // may be reset later
367 /* Is there any login information? (we should eventually parse it above) */
368 t
= strrchr(foundHost
, '@');
370 strncpy((char *) login
, (char *) foundHost
, sizeof(login
)-1);
371 login
[sizeof(login
)-1] = '\0';
372 t
= strrchr(login
, '@');
374 strncpy((char *) foundHost
, t
+ 1, sizeof(foundHost
)-1);
375 foundHost
[sizeof(foundHost
)-1] = '\0';
376 // Bug 4498: URL-unescape the login info after extraction
377 rfc1738_unescape(login
);
380 /* Is there any host information? (we should eventually parse it above) */
381 if (*foundHost
== '[') {
382 /* strip any IPA brackets. valid under IPv6. */
384 /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
387 l
= strlen(foundHost
);
389 for (; i
< l
&& *src
!= ']' && *src
!= '\0'; ++i
, ++src
, ++dst
) {
393 /* we moved in-place, so truncate the actual hostname found */
397 /* skip ahead to either start of port, or original EOS */
398 while (*dst
!= '\0' && *dst
!= ':')
402 t
= strrchr(foundHost
, ':');
404 if (t
!= strchr(foundHost
,':') ) {
405 /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
406 /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
407 /* therefore we MUST accept the case where they are not bracketed at all. */
412 // Bug 3183 sanity check: If scheme is present, host must be too.
413 if (scheme
!= AnyP::PROTO_NONE
&& foundHost
[0] == '\0') {
414 debugs(23, DBG_IMPORTANT
, "SECURITY ALERT: Missing hostname in URL '" << url
<< "'. see access.log for details.");
418 if (t
&& *t
== ':') {
425 for (t
= foundHost
; *t
; ++t
)
428 if (stringHasWhitespace(foundHost
)) {
429 if (URI_WHITESPACE_STRIP
== Config
.uri_whitespace
) {
442 debugs(23, 3, "Split URL '" << rawUrl
<< "' into proto='" << scheme
.image() << "', host='" << foundHost
<< "', port='" << foundPort
<< "', path='" << urlpath
<< "'");
444 if (Config
.onoff
.check_hostnames
&&
445 strspn(foundHost
, Config
.onoff
.allow_underscore
? valid_hostname_chars_u
: valid_hostname_chars
) != strlen(foundHost
)) {
446 debugs(23, DBG_IMPORTANT
, MYNAME
<< "Illegal character in hostname '" << foundHost
<< "'");
450 if (!urlAppendDomain(foundHost
))
453 /* remove trailing dots from hostnames */
454 while ((l
= strlen(foundHost
)) > 0 && foundHost
[--l
] == '.')
457 /* reject duplicate or leading dots */
458 if (strstr(foundHost
, "..") || *foundHost
== '.') {
459 debugs(23, DBG_IMPORTANT
, MYNAME
<< "Illegal hostname '" << foundHost
<< "'");
463 if (foundPort
< 1 || foundPort
> 65535) {
464 debugs(23, 3, "Invalid port '" << foundPort
<< "'");
468 if (stringHasWhitespace(urlpath
)) {
469 debugs(23, 2, "URI has whitespace: {" << rawUrl
<< "}");
471 switch (Config
.uri_whitespace
) {
473 case URI_WHITESPACE_DENY
:
476 case URI_WHITESPACE_ALLOW
:
479 case URI_WHITESPACE_ENCODE
:
480 t
= rfc1738_escape_unescaped(urlpath
);
481 xstrncpy(urlpath
, t
, MAX_URL
);
484 case URI_WHITESPACE_CHOP
:
485 *(urlpath
+ strcspn(urlpath
, w_space
)) = '\0';
488 case URI_WHITESPACE_STRIP
:
505 userInfo(SBuf(login
));
510 debugs(23, 2, "error: " << CurrentException
<< " " << Raw("rawUrl", rawUrl
.rawContent(), rawUrl
.length()));
516 * Governed by RFC 8141 section 2:
518 * assigned-name = "urn" ":" NID ":" NSS
519 * NID = (alphanum) 0*30(ldh) (alphanum)
520 * ldh = alphanum / "-"
521 * NSS = pchar *(pchar / "/")
523 * RFC 3986 Appendix D.2 defines (as deprecated):
525 * alphanum = ALPHA / DIGIT
527 * Notice that NID is exactly 2-32 characters in length.
530 AnyP::Uri::parseUrn(Parser::Tokenizer
&tok
)
532 static const auto nidChars
= CharacterSet("NID","-") + CharacterSet::ALPHA
+ CharacterSet::DIGIT
;
533 static const auto alphanum
= (CharacterSet::ALPHA
+ CharacterSet::DIGIT
).rename("alphanum");
535 if (!tok
.prefix(nid
, nidChars
, 32))
536 throw TextException("NID not found", Here());
539 throw TextException("NID too long or missing ':' delimiter", Here());
541 if (nid
.length() < 2)
542 throw TextException("NID too short", Here());
544 if (!alphanum
[*nid
.begin()])
545 throw TextException("NID prefix is not alphanumeric", Here());
547 if (!alphanum
[*nid
.rbegin()])
548 throw TextException("NID suffix is not alphanumeric", Here());
550 setScheme(AnyP::PROTO_URN
, nullptr);
552 // TODO validate path characters
553 path(tok
.remaining());
554 debugs(23, 3, "Split URI into proto=urn, nid=" << nid
<< ", " << Raw("path",path().rawContent(),path().length()));
561 authorityHttp_
.clear();
562 authorityWithPort_
.clear();
566 AnyP::Uri::authority(bool requirePort
) const
568 if (authorityHttp_
.isEmpty()) {
570 // both formats contain Host/IP
571 authorityWithPort_
.append(host());
572 authorityHttp_
= authorityWithPort_
;
574 // authorityForm_ only has :port if it is non-default
575 authorityWithPort_
.appendf(":%u",port());
576 if (port() != getScheme().defaultPort())
577 authorityHttp_
= authorityWithPort_
;
580 return requirePort
? authorityWithPort_
: authorityHttp_
;
584 AnyP::Uri::absolute() const
586 if (absolute_
.isEmpty()) {
587 // TODO: most URL will be much shorter, avoid allocating this much
588 absolute_
.reserveCapacity(MAX_URL
);
590 absolute_
.append(getScheme().image());
591 absolute_
.append(":",1);
592 if (getScheme() != AnyP::PROTO_URN
) {
593 absolute_
.append("//", 2);
594 const bool allowUserInfo
= getScheme() == AnyP::PROTO_FTP
||
595 getScheme() == AnyP::PROTO_UNKNOWN
;
597 if (allowUserInfo
&& !userInfo().isEmpty()) {
598 static const CharacterSet uiChars
= CharacterSet(UserInfoChars())
600 .rename("userinfo-reserved");
601 absolute_
.append(Encode(userInfo(), uiChars
));
602 absolute_
.append("@", 1);
604 absolute_
.append(authority());
606 absolute_
.append(host());
607 absolute_
.append(":", 1);
609 absolute_
.append(path()); // TODO: Encode each URI subcomponent in path_ as needed.
615 /* XXX: Performance: This is an *almost* duplicate of HttpRequest::effectiveRequestUri(). But elides the query-string.
616 * After copying it on in the first place! Would be less code to merge the two with a flag parameter.
617 * and never copy the query-string part in the first place
620 urlCanonicalCleanWithoutRequest(const SBuf
&url
, const HttpRequestMethod
&method
, const AnyP::UriScheme
&scheme
)
622 LOCAL_ARRAY(char, buf
, MAX_URL
);
624 snprintf(buf
, sizeof(buf
), SQUIDSBUFPH
, SQUIDSBUFPRINT(url
));
625 buf
[sizeof(buf
)-1] = '\0';
627 // URN, CONNECT method, and non-stripped URIs can go straight out
628 if (Config
.onoff
.strip_query_terms
&& !(method
== Http::METHOD_CONNECT
|| scheme
== AnyP::PROTO_URN
)) {
629 // strip anything AFTER a question-mark
630 // leaving the '?' in place
631 if (auto t
= strchr(buf
, '?')) {
636 if (stringHasCntl(buf
))
637 xstrncpy(buf
, rfc1738_escape_unescaped(buf
), MAX_URL
);
643 * Yet another alternative to urlCanonical.
644 * This one adds the https:// parts to Http::METHOD_CONNECT URL
645 * for use in error page outputs.
646 * Luckily we can leverage the others instead of duplicating.
649 urlCanonicalFakeHttps(const HttpRequest
* request
)
651 LOCAL_ARRAY(char, buf
, MAX_URL
);
653 // method CONNECT and port HTTPS
654 if (request
->method
== Http::METHOD_CONNECT
&& request
->url
.port() == 443) {
655 snprintf(buf
, MAX_URL
, "https://%s/*", request
->url
.host());
659 // else do the normal complete canonical thing.
660 return request
->canonicalCleanUrl();
664 * Test if a URL is a relative reference.
666 * Governed by RFC 3986 section 4.2
668 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
670 * relative-part = "//" authority path-abempty
676 urlIsRelative(const char *url
)
679 return false; // no URL
682 * RFC 3986 section 5.2.3
684 * path = path-abempty ; begins with "/" or is empty
685 * / path-absolute ; begins with "/" but not "//"
686 * / path-noscheme ; begins with a non-colon segment
687 * / path-rootless ; begins with a segment
688 * / path-empty ; zero characters
692 return true; // path-empty
695 // RFC 3986 section 5.2.3
696 // path-absolute ; begins with "/" but not "//"
698 return true; // network-path reference, aka. 'scheme-relative URI'
700 return true; // path-absolute, aka 'absolute-path reference'
703 for (const auto *p
= url
; *p
!= '\0' && *p
!= '/' && *p
!= '?' && *p
!= '#'; ++p
) {
705 return false; // colon is forbidden in first segment
708 return true; // path-noscheme, path-abempty, path-rootless
712 AnyP::Uri::addRelativePath(const char *relUrl
)
714 // URN cannot be merged
715 if (getScheme() == AnyP::PROTO_URN
)
718 // TODO: Handle . and .. segment normalization
720 const auto lastSlashPos
= path_
.rfind('/');
721 // TODO: To optimize and simplify, add and use SBuf::replace().
722 const auto relUrlLength
= strlen(relUrl
);
723 if (lastSlashPos
== SBuf::npos
) {
724 // start replacing the whole path
725 path_
.reserveCapacity(1 + relUrlLength
);
726 path_
.assign("/", 1);
728 // start replacing just the last segment
729 path_
.reserveCapacity(lastSlashPos
+ 1 + relUrlLength
);
730 path_
.chop(0, lastSlashPos
+1);
732 path_
.append(relUrl
, relUrlLength
);
736 matchDomainName(const char *h
, const char *d
, MatchDomainNameFlags flags
)
741 const bool hostIncludesSubdomains
= (*h
== '.');
753 * Start at the ends of the two strings and work towards the
756 while (xtolower(h
[--hl
]) == xtolower(d
[--dl
])) {
757 if (hl
== 0 && dl
== 0) {
759 * We made it all the way to the beginning of both
760 * strings without finding any difference.
767 * The host string is shorter than the domain string.
768 * There is only one case when this can be a match.
769 * If the domain is just one character longer, and if
770 * that character is a leading '.' then we call it a
774 if (1 == dl
&& '.' == d
[0])
782 * The domain string is shorter than the host string.
783 * This is a match only if the first domain character
788 if (flags
& mdnRejectSubsubDomains
) {
789 // Check for sub-sub domain and reject
790 while(--hl
>= 0 && h
[hl
] != '.');
792 // No sub-sub domain found, but reject if there is a
793 // leading dot in given host string (which is removed
794 // before the check is started).
795 return hostIncludesSubdomains
? 1 : 0;
797 return 1; // sub-sub domain, reject
806 * We found different characters in the same position (from the end).
809 // If the h has a form of "*.foo.com" and d has a form of "x.foo.com"
810 // then the h[hl] points to '*', h[hl+1] to '.' and d[dl] to 'x'
811 // The following checks are safe, the "h[hl + 1]" in the worst case is '\0'.
812 if ((flags
& mdnHonorWildcards
) && h
[hl
] == '*' && h
[hl
+ 1] == '.')
816 * If one of those character is '.' then its special. In order
817 * for splay tree sorting to work properly, "x-foo.com" must
818 * be greater than ".foo.com" even though '-' is less than '.'.
826 return (xtolower(h
[hl
]) - xtolower(d
[dl
]));
830 * return true if we can serve requests for this method.
833 urlCheckRequest(const HttpRequest
* r
)
835 /* protocol "independent" methods
837 * actually these methods are specific to HTTP:
838 * they are methods we receive on our HTTP port,
839 * and if we had a FTP listener would not be relevant
842 * So, we should delegate them to HTTP. The problem is that we
843 * do not have a default protocol from the client side of HTTP.
846 if (r
->method
== Http::METHOD_CONNECT
)
849 // we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
850 // we also support forwarding OPTIONS and TRACE, except for the *-URI ones
851 if (r
->method
== Http::METHOD_OPTIONS
|| r
->method
== Http::METHOD_TRACE
)
852 return (r
->header
.getInt64(Http::HdrType::MAX_FORWARDS
) == 0 || r
->url
.path() != AnyP::Uri::Asterisk());
854 if (r
->method
== Http::METHOD_PURGE
)
857 /* does method match the protocol? */
858 switch (r
->url
.getScheme()) {
860 case AnyP::PROTO_URN
:
861 case AnyP::PROTO_HTTP
:
862 case AnyP::PROTO_CACHE_OBJECT
:
865 case AnyP::PROTO_FTP
:
866 if (r
->method
== Http::METHOD_PUT
||
867 r
->method
== Http::METHOD_GET
||
868 r
->method
== Http::METHOD_HEAD
)
872 case AnyP::PROTO_WAIS
:
873 case AnyP::PROTO_WHOIS
:
874 if (r
->method
== Http::METHOD_GET
||
875 r
->method
== Http::METHOD_HEAD
)
879 case AnyP::PROTO_HTTPS
:
880 #if USE_OPENSSL || USE_GNUTLS
884 * Squid can't originate an SSL connection, so it should
885 * never receive an "https:" URL. It should always be
899 AnyP::Uri::Uri(AnyP::UriScheme
const &aScheme
) :
901 hostIsNumeric_(false),
907 // TODO: fix code duplication with AnyP::Uri::parse()
909 AnyP::Uri::cleanup(const char *uri
)
911 char *cleanedUri
= nullptr;
912 switch (Config
.uri_whitespace
) {
913 case URI_WHITESPACE_ALLOW
: {
914 const auto flags
= RFC1738_ESCAPE_NOSPACE
| RFC1738_ESCAPE_UNESCAPED
;
915 cleanedUri
= xstrndup(rfc1738_do_escape(uri
, flags
), MAX_URL
);
919 case URI_WHITESPACE_ENCODE
:
920 cleanedUri
= xstrndup(rfc1738_do_escape(uri
, RFC1738_ESCAPE_UNESCAPED
), MAX_URL
);
923 case URI_WHITESPACE_CHOP
: {
924 const auto pos
= strcspn(uri
, w_space
);
925 char *choppedUri
= nullptr;
926 if (pos
< strlen(uri
))
927 choppedUri
= xstrndup(uri
, pos
+ 1);
928 cleanedUri
= xstrndup(rfc1738_do_escape(choppedUri
? choppedUri
: uri
,
929 RFC1738_ESCAPE_UNESCAPED
), MAX_URL
);
930 cleanedUri
[pos
] = '\0';
935 case URI_WHITESPACE_DENY
:
936 case URI_WHITESPACE_STRIP
:
938 // TODO: avoid duplication with urlParse()
940 char *tmp_uri
= static_cast<char*>(xmalloc(strlen(uri
) + 1));
951 cleanedUri
= xstrndup(rfc1738_escape_unescaped(tmp_uri
), MAX_URL
);