From: Alex Rousskov Date: Wed, 28 Jun 2023 09:28:59 +0000 (+0000) Subject: Reject more CONNECT requests with malformed targets (#1253) X-Git-Tag: SQUID_7_0_1~415 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=963ff14337f7bea953658d99870a0dbd2e74a206;p=thirdparty%2Fsquid.git Reject more CONNECT requests with malformed targets (#1253) Squid silently ignored many syntax violations, interpreting a malformed target as a valid host:port address of some origin server and establishing a CONNECT tunnel with that origin server. While being "tolerant" probably did not compromise the Squid instance itself, some known attacks abuse the _difference_ in treatment of malformed requests. Rejecting malformed requests and closing the connection prevents many such attacks. Among other syntax violations, this change rejects bracketed pure IPv4 addresses and bracketed domain names in CONNECT targets. Bracketed IPv6 addresses that include an IPv4address suffix are still accepted (e.g., look for ABNF containing ls32 in RFC 3986, Section 3.2.2). CONNECT target hosts are no longer covered by uri_whitespace: CONNECT targets containing whitespace are now rejected with in ERR_INVALID_URL regardless of uri_whitespace and check_hostnames settings. With the exception of whitespaces in host names when uri_whitespace is set to "strip" (default), the uri_whitespace directive was already ignored for all request targets. For example, whitespaces in the port component were always handled without checking uri_whitespace. In CONNECT context, stripping whitespace is not only risky but probably is not needed in practice because user typos should not lead to spaces in CONNECT targets, and even if they do, virtually all legitimate use cases ought to fail during certificate validation and similar post-CONNECT checks. --- diff --git a/src/anyp/Uri.cc b/src/anyp/Uri.cc index 4114db2368..e004470c41 100644 --- a/src/anyp/Uri.cc +++ b/src/anyp/Uri.cc @@ -16,6 +16,7 @@ #include "parser/Tokenizer.h" #include "rfc1738.h" #include "SquidConfig.h" +#include "SquidMath.h" #include "SquidString.h" static const char valid_hostname_chars_u[] = @@ -285,24 +286,19 @@ AnyP::Uri::parse(const HttpRequestMethod& method, const SBuf &rawUrl) AnyP::UriScheme scheme; if (method == Http::METHOD_CONNECT) { - /* - * RFC 7230 section 5.3.3: authority-form = authority - * "excluding any userinfo and its "@" delimiter" - * - * RFC 3986 section 3.2: authority = [ userinfo "@" ] host [ ":" port ] - * - * As an HTTP(S) proxy we assume HTTPS (443) if no port provided. - */ - foundPort = 443; + // For CONNECTs, RFC 9110 Section 9.3.6 requires "only the host and + // port number of the tunnel destination, separated by a colon". - // XXX: use tokenizer - auto B = tok.buf(); - const char *url = B.c_str(); + const auto rawHost = parseHost(tok); + Assure(rawHost.length() < sizeof(foundHost)); + SBufToCstring(foundHost, rawHost); - if (sscanf(url, "[%[^]]]:%d", foundHost, &foundPort) < 1) - if (sscanf(url, "%[^:]:%d", foundHost, &foundPort) < 1) - return false; + if (!tok.skip(':')) + throw TextException("missing required :port in CONNECT target", Here()); + foundPort = parsePort(tok); + if (!tok.remaining().isEmpty()) + throw TextException("garbage after host:port in CONNECT target", Here()); } else { scheme = uriParseScheme(tok); @@ -558,6 +554,89 @@ AnyP::Uri::parseUrn(Parser::Tokenizer &tok) debugs(23, 3, "Split URI into proto=urn, nid=" << nid << ", " << Raw("path",path().rawContent(),path().length())); } +/// Extracts and returns a (suspected but only partially validated) uri-host +/// IPv6address, IPv4address, or reg-name component. This function uses (and +/// quotes) RFC 3986, Section 3.2.2 syntax rules. +SBuf +AnyP::Uri::parseHost(Parser::Tokenizer &tok) const +{ + // host = IP-literal / IPv4address / reg-name + + // XXX: CharacterSets below reject uri-host values containing whitespace + // (e.g., "10.0.0. 1"). That is not a bug, but the uri_whitespace directive + // can be interpreted as if it applies to uri-host and this code. TODO: Fix + // uri_whitespace and the code using it to exclude uri-host (and URI scheme, + // port, etc.) from that directive scope. + + // IP-literal = "[" ( IPv6address / IPvFuture ) "]" + if (tok.skip('[')) { + // Add "." because IPv6address in RFC 3986 includes ls32, which includes + // IPv4address: ls32 = ( h16 ":" h16 ) / IPv4address + // This set rejects IPvFuture that needs a "v" character. + static const CharacterSet IPv6chars = ( + CharacterSet::HEXDIG + CharacterSet("colon", ":") + CharacterSet("period", ".")).rename("IPv6"); + SBuf ipv6ish; + if (!tok.prefix(ipv6ish, IPv6chars)) + throw TextException("malformed or unsupported bracketed IP address in uri-host", Here()); + + if (!tok.skip(']')) + throw TextException("IPv6 address is missing a closing bracket in uri-host", Here()); + + // This rejects bracketed IPv4address and domain names because they lack ":". + if (ipv6ish.find(':') == SBuf::npos) + throw TextException("bracketed IPv6 address is missing a colon in uri-host", Here()); + + // This rejects bracketed non-IP addresses that our caller would have + // otherwise mistaken for a domain name (e.g., '[127.0.0:1]'). + Ip::Address ipv6check; + if (!ipv6check.fromHost(ipv6ish.c_str())) + throw TextException("malformed bracketed IPv6 address in uri-host", Here()); + + return ipv6ish; + } + + // no brackets implies we are looking at IPv4address or reg-name + + // XXX: This code does not detect/reject some bad host values (e.g. "!#$%&" + // and "1.2.3.4.5"). TODO: Add more checks here, after migrating the + // non-CONNECT uri-host parsing code to use us. + + SBuf otherHost; // IPv4address-ish or reg-name-ish; + // ":" is not in TCHAR so we will stop before any port specification + if (tok.prefix(otherHost, CharacterSet::TCHAR)) + return otherHost; + + throw TextException("malformed IPv4 address or host name in uri-host", Here()); +} + +/// Extracts and returns an RFC 3986 URI authority port value (with additional +/// restrictions). The RFC defines port as a possibly empty sequence of decimal +/// digits. We reject certain ports (that are syntactically valid from the RFC +/// point of view) because we are worried that Squid and other traffic handlers +/// may dangerously mishandle unusual (and virtually always bogus) port numbers. +/// Rejected ports cannot be successfully used by Squid itself. +int +AnyP::Uri::parsePort(Parser::Tokenizer &tok) const +{ + if (tok.skip('0')) + throw TextException("zero or zero-prefixed port", Here()); + + int64_t rawPort = 0; + if (!tok.int64(rawPort, 10, false)) // port = *DIGIT + throw TextException("malformed or missing port", Here()); + + Assure(rawPort > 0); + constexpr KnownPort portMax = 65535; // TODO: Make this a class-scope constant and REuse it. + constexpr auto portStorageMax = std::numeric_limits::max(); + static_assert(!Less(portStorageMax, portMax), "Port type can represent the maximum valid port number"); + if (Less(portMax, rawPort)) + throw TextException("huge port", Here()); + + // TODO: Return KnownPort after migrating the non-CONNECT uri-host parsing + // code to use us (so that foundPort "int" disappears or starts using Port). + return NaturalCast(rawPort); +} + void AnyP::Uri::touch() { diff --git a/src/anyp/Uri.h b/src/anyp/Uri.h index 1db3c971f0..ddf4712430 100644 --- a/src/anyp/Uri.h +++ b/src/anyp/Uri.h @@ -147,6 +147,9 @@ public: private: void parseUrn(Parser::Tokenizer&); + SBuf parseHost(Parser::Tokenizer &) const; + int parsePort(Parser::Tokenizer &) const; + /** \par * The scheme of this URL. This has the 'type code' smell about it.