From: Amos Jeffries Date: Mon, 26 Jun 2017 02:14:42 +0000 (+1200) Subject: Bug 1961 partial: move urlParse() into class URL X-Git-Tag: M-staged-PR71~91 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=91489e450151447a010af2f8f283edb06111da3c;p=thirdparty%2Fsquid.git Bug 1961 partial: move urlParse() into class URL * rename local variables in urlParse() to avoid symbol clashes with class URL members and methods. * move HttpRequest method assignment out to the single caller which actually needed it. Others all passed in the method which was already set on the HttpRequest object passed. * removed now needless HttpRequest parameter of urlParse() * rename urlParse as a class URL method * make URL::parseFinish() private * remove unnecessary CONNECT_PORT define * add RFC documentation for 'CONNECT' URI handling * fixed two XXX in URL-rewrite handling doing unnecessary HttpRequest object creation and destruction cycles on invalid URL-rewrite helper output. --- diff --git a/src/HttpRequest.cc b/src/HttpRequest.cc index 00c0fdb388..4366f66be1 100644 --- a/src/HttpRequest.cc +++ b/src/HttpRequest.cc @@ -332,7 +332,7 @@ HttpRequest::parseFirstLine(const char *start, const char *end) * (char *) end = '\0'; // temp terminate URI, XXX dangerous? - const bool ret = urlParse(method, (char *) start, *this); + const bool ret = url.parse(method, (char *) start); * (char *) end = save; @@ -523,8 +523,10 @@ HttpRequest * HttpRequest::FromUrl(char * url, const MasterXaction::Pointer &mx, const HttpRequestMethod& method) { std::unique_ptr req(new HttpRequest(mx)); - if (urlParse(method, url, *req)) + if (req->url.parse(method, url)) { + req->method = method; return req.release(); + } return nullptr; } diff --git a/src/URL.h b/src/URL.h index a45189ea91..88e856698c 100644 --- a/src/URL.h +++ b/src/URL.h @@ -53,8 +53,7 @@ public: } void touch(); ///< clear the cached URI display forms - /// Update the URL object with parsed URI data. - void parseFinish(const AnyP::ProtocolType, const char *const protoStr, const char *const path, const char *const host, const SBuf &login, const int port); + bool parse(const HttpRequestMethod &, char *url); AnyP::UriScheme const & getScheme() const {return scheme_;} @@ -107,6 +106,8 @@ public: SBuf &absolute() const; private: + void parseFinish(const AnyP::ProtocolType, const char *const, const char *const, const char *const, const SBuf &, const int); + /** \par * The scheme of this URL. This has the 'type code' smell about it. diff --git a/src/adaptation/ServiceConfig.cc b/src/adaptation/ServiceConfig.cc index 1d4697bc84..648c127e09 100644 --- a/src/adaptation/ServiceConfig.cc +++ b/src/adaptation/ServiceConfig.cc @@ -189,7 +189,7 @@ bool Adaptation::ServiceConfig::grokUri(const char *value) { // TODO: find core code that parses URLs and extracts various parts - // AYJ: most of this is duplicate of urlParse() in src/url.cc + // AYJ: most of this is duplicate of URL::parse() in src/url.cc if (!value || !*value) { debugs(3, DBG_CRITICAL, HERE << cfg_filename << ':' << config_lineno << ": " << diff --git a/src/adaptation/ecap/MessageRep.cc b/src/adaptation/ecap/MessageRep.cc index be572e9437..31e37ba3cb 100644 --- a/src/adaptation/ecap/MessageRep.cc +++ b/src/adaptation/ecap/MessageRep.cc @@ -203,11 +203,11 @@ Adaptation::Ecap::RequestLineRep::RequestLineRep(HttpRequest &aMessage): void Adaptation::Ecap::RequestLineRep::uri(const Area &aUri) { - // TODO: if method is not set, urlPath will assume it is not connect; - // Can we change urlParse API to remove the method parameter? - // TODO: optimize: urlPath should take constant URL buffer + // TODO: if method is not set, URL::parse will assume it is not connect; + // Can we change URL::parse API to remove the method parameter? + // TODO: optimize: URL::parse should take constant URL buffer char *buf = xstrdup(aUri.toString().c_str()); - const bool ok = urlParse(theMessage.method, buf, theMessage); + const bool ok = theMessage.url.parse(theMessage.method, buf); xfree(buf); Must(ok); } diff --git a/src/client_side_request.cc b/src/client_side_request.cc index 2822d1a7a8..664ddf0e1d 100644 --- a/src/client_side_request.cc +++ b/src/client_side_request.cc @@ -352,9 +352,9 @@ clientBeginRequest(const HttpRequestMethod& method, char const *url, CSCB * stre } /* - * now update the headers in request with our supplied headers. urlParse - * should return a blank header set, but we use Update to be sure of - * correctness. + * now update the headers in request with our supplied headers. + * HttpRequest::FromUrl() should return a blank header set, but + * we use Update to be sure of correctness. */ if (header) request->header.update(header); @@ -1264,10 +1264,10 @@ ClientRequestContext::clientRedirectDone(const Helper::Reply &reply) // prevent broken helpers causing too much damage. If old URL == new URL skip the re-write. if (urlNote != NULL && strcmp(urlNote, http->uri)) { - // XXX: validate the URL properly *without* generating a whole new request object right here. - // XXX: the clone() should be done only AFTER we know the new URL is valid. - HttpRequest *new_request = old_request->clone(); - if (urlParse(old_request->method, const_cast(urlNote), *new_request)) { + URL tmpUrl; + if (tmpUrl.parse(old_request->method, const_cast(urlNote))) { + HttpRequest *new_request = old_request->clone(); + new_request->url = tmpUrl; debugs(61, 2, "URL-rewriter diverts URL from " << old_request->effectiveRequestUri() << " to " << new_request->effectiveRequestUri()); // update the new request to flag the re-writing was done on it @@ -1289,7 +1289,6 @@ ClientRequestContext::clientRedirectDone(const Helper::Reply &reply) } else { debugs(85, DBG_CRITICAL, "ERROR: URL-rewrite produces invalid request: " << old_request->method << " " << urlNote << " " << old_request->http_ver); - delete new_request; } } } diff --git a/src/defines.h b/src/defines.h index feb6189705..4cb0249115 100644 --- a/src/defines.h +++ b/src/defines.h @@ -93,8 +93,6 @@ #define NTLM_CHALLENGE_SZ 300 -#define CONNECT_PORT 443 - #define current_stacksize(stack) ((stack)->top - (stack)->base) /* logfile status */ diff --git a/src/redirect.cc b/src/redirect.cc index 5d0ac02e12..c094146ecc 100644 --- a/src/redirect.cc +++ b/src/redirect.cc @@ -108,9 +108,9 @@ redirectHandleReply(void *data, const Helper::Reply &reply) // if we still have anything in other() after all that // parse it into status=, url= and rewrite-url= keys if (replySize) { - /* 2012-06-28: This cast is due to urlParse() truncating too-long URLs itself. + /* 2012-06-28: This cast is due to URL::parse() truncating too-long URLs itself. * At this point altering the helper buffer in that way is not harmful, but annoying. - * When Bug 1961 is resolved and urlParse has a const API, this needs to die. + * When Bug 1961 is resolved and URL::parse has a const API, this needs to die. */ MemBuf replyBuffer; replyBuffer.init(replySize, replySize); diff --git a/src/tests/stub_url.cc b/src/tests/stub_url.cc index 8ca856b702..2a7a6be718 100644 --- a/src/tests/stub_url.cc +++ b/src/tests/stub_url.cc @@ -14,6 +14,7 @@ #include "URL.h" URL::URL(AnyP::UriScheme const &) {STUB} void URL::touch() STUB +bool URL::parse(const HttpRequestMethod&, char *) STUB_RETVAL(true) void URL::host(const char *) STUB static SBuf nil; const SBuf &URL::path() const STUB_RETVAL(nil) @@ -30,7 +31,6 @@ const SBuf &URL::Asterisk() SBuf &URL::authority(bool) const STUB_RETVAL(nil) SBuf &URL::absolute() const STUB_RETVAL(nil) void urlInitialize() STUB -bool urlParse(const HttpRequestMethod&, char *, HttpRequest &) STUB_RETVAL(true) char *urlCanonicalClean(const HttpRequest *) STUB_RETVAL(nullptr) const char *urlCanonicalFakeHttps(const HttpRequest *) STUB_RETVAL(nullptr) bool urlIsRelative(const char *) STUB_RETVAL(false) diff --git a/src/url.cc b/src/url.cc index dd724dbb1a..35bbcc69fb 100644 --- a/src/url.cc +++ b/src/url.cc @@ -188,46 +188,52 @@ urlParseProtocol(const char *b) * being "end of host with implied path of /". */ bool -urlParse(const HttpRequestMethod& method, char *url, HttpRequest &request) +URL::parse(const HttpRequestMethod& method, char *url) { LOCAL_ARRAY(char, proto, MAX_URL); LOCAL_ARRAY(char, login, MAX_URL); - LOCAL_ARRAY(char, host, MAX_URL); + LOCAL_ARRAY(char, foundHost, MAX_URL); LOCAL_ARRAY(char, urlpath, MAX_URL); char *t = NULL; char *q = NULL; - int port; + int foundPort; AnyP::ProtocolType protocol = AnyP::PROTO_NONE; int l; int i; const char *src; char *dst; - proto[0] = host[0] = urlpath[0] = login[0] = '\0'; + proto[0] = foundHost[0] = urlpath[0] = login[0] = '\0'; if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) { /* terminate so it doesn't overflow other buffers */ *(url + (MAX_URL >> 1)) = '\0'; - debugs(23, DBG_IMPORTANT, "urlParse: URL too large (" << l << " bytes)"); + debugs(23, DBG_IMPORTANT, MYNAME << "URL too large (" << l << " bytes)"); return false; } if (method == Http::METHOD_CONNECT) { - port = CONNECT_PORT; + /* + * RFC 7230 section 5.3.3: authority-form = authority + * "excluding any userinfo and its "@" delimiter" + * + * RFC 3986 section 3.2: authority = [ userinfo "@" ] host [ ":" port ] + * + * As an HTTP(S) proxy we assume HTTPS (443) if no port provided. + */ + foundPort = 443; - if (sscanf(url, "[%[^]]]:%d", host, &port) < 1) - if (sscanf(url, "%[^:]:%d", host, &port) < 1) + if (sscanf(url, "[%[^]]]:%d", foundHost, &foundPort) < 1) + if (sscanf(url, "%[^:]:%d", foundHost, &foundPort) < 1) return false; } else if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) && URL::Asterisk().cmp(url) == 0) { - request.method = method; - request.url.parseFinish(AnyP::PROTO_HTTP, nullptr, url, host, SBuf(), 80); + parseFinish(AnyP::PROTO_HTTP, nullptr, url, foundHost, SBuf(), 80 /* HTTP default port */); return true; } else if (strncmp(url, "urn:", 4) == 0) { debugs(23, 3, "Split URI '" << url << "' into proto='urn', path='" << (url+4) << "'"); debugs(50, 5, "urn=" << (url+4)); - request.method = method; - request.url.setScheme(AnyP::PROTO_URN, nullptr); - request.url.path(url + 4); + setScheme(AnyP::PROTO_URN, nullptr); + path(url + 4); return true; } else { /* Parse the URL: */ @@ -251,7 +257,7 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest &request) // bug 1881: If we don't get a "/" then we imply it was there // bug 3074: We could just be given a "?" or "#". These also imply "/" // bug 3233: whitespace is also a hostname delimiter. - for (dst = host; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) { + for (dst = foundHost; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) { *dst = *src; } @@ -287,29 +293,29 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest &request) *dst = '\0'; protocol = urlParseProtocol(proto); - port = AnyP::UriScheme(protocol).defaultPort(); + foundPort = AnyP::UriScheme(protocol).defaultPort(); /* Is there any login information? (we should eventually parse it above) */ - t = strrchr(host, '@'); + t = strrchr(foundHost, '@'); if (t != NULL) { - strncpy((char *) login, (char *) host, sizeof(login)-1); + strncpy((char *) login, (char *) foundHost, sizeof(login)-1); login[sizeof(login)-1] = '\0'; t = strrchr(login, '@'); *t = 0; - strncpy((char *) host, t + 1, sizeof(host)-1); - host[sizeof(host)-1] = '\0'; + strncpy((char *) foundHost, t + 1, sizeof(foundHost)-1); + foundHost[sizeof(foundHost)-1] = '\0'; // Bug 4498: URL-unescape the login info after extraction rfc1738_unescape(login); } /* Is there any host information? (we should eventually parse it above) */ - if (*host == '[') { + if (*foundHost == '[') { /* strip any IPA brackets. valid under IPv6. */ - dst = host; + dst = foundHost; /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */ - src = host; + src = foundHost; ++src; - l = strlen(host); + l = strlen(foundHost); i = 1; for (; i < l && *src != ']' && *src != '\0'; ++i, ++src, ++dst) { *dst = *src; @@ -324,9 +330,9 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest &request) ++dst; t = dst; } else { - t = strrchr(host, ':'); + t = strrchr(foundHost, ':'); - if (t != strchr(host,':') ) { + if (t != strchr(foundHost,':') ) { /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */ /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */ /* therefore we MUST accept the case where they are not bracketed at all. */ @@ -335,7 +341,7 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest &request) } // Bug 3183 sanity check: If scheme is present, host must be too. - if (protocol != AnyP::PROTO_NONE && host[0] == '\0') { + if (protocol != AnyP::PROTO_NONE && foundHost[0] == '\0') { debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details."); return false; } @@ -343,16 +349,16 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest &request) if (t && *t == ':') { *t = '\0'; ++t; - port = atoi(t); + foundPort = atoi(t); } } - for (t = host; *t; ++t) + for (t = foundHost; *t; ++t) *t = xtolower(*t); - if (stringHasWhitespace(host)) { + if (stringHasWhitespace(foundHost)) { if (URI_WHITESPACE_STRIP == Config.uri_whitespace) { - t = q = host; + t = q = foundHost; while (*t) { if (!xisspace(*t)) { *q = *t; @@ -364,43 +370,44 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest &request) } } - debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'"); + debugs(23, 3, "Split URL '" << url << "' into proto='" << proto << "', host='" << foundHost << "', port='" << foundPort << "', path='" << urlpath << "'"); - if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) { - debugs(23, DBG_IMPORTANT, "urlParse: Illegal character in hostname '" << host << "'"); + if (Config.onoff.check_hostnames && + strspn(foundHost, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(foundHost)) { + debugs(23, DBG_IMPORTANT, MYNAME << "Illegal character in hostname '" << foundHost << "'"); return false; } /* For IPV6 addresses also check for a colon */ - if (Config.appendDomain && !strchr(host, '.') && !strchr(host, ':')) - strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1); + if (Config.appendDomain && !strchr(foundHost, '.') && !strchr(foundHost, ':')) + strncat(foundHost, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(foundHost) - 1); /* remove trailing dots from hostnames */ - while ((l = strlen(host)) > 0 && host[--l] == '.') - host[l] = '\0'; + while ((l = strlen(foundHost)) > 0 && foundHost[--l] == '.') + foundHost[l] = '\0'; /* reject duplicate or leading dots */ - if (strstr(host, "..") || *host == '.') { - debugs(23, DBG_IMPORTANT, "urlParse: Illegal hostname '" << host << "'"); + if (strstr(foundHost, "..") || *foundHost == '.') { + debugs(23, DBG_IMPORTANT, MYNAME << "Illegal hostname '" << foundHost << "'"); return false; } - if (port < 1 || port > 65535) { - debugs(23, 3, "urlParse: Invalid port '" << port << "'"); + if (foundPort < 1 || foundPort > 65535) { + debugs(23, 3, "Invalid port '" << foundPort << "'"); return false; } #if HARDCODE_DENY_PORTS /* These ports are filtered in the default squid.conf, but * maybe someone wants them hardcoded... */ - if (port == 7 || port == 9 || port == 19) { - debugs(23, DBG_CRITICAL, "urlParse: Deny access to port " << port); + if (foundPort == 7 || foundPort == 9 || foundPort == 19) { + debugs(23, DBG_CRITICAL, MYNAME << "Deny access to port " << foundPort); return false; } #endif if (stringHasWhitespace(urlpath)) { - debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}"); + debugs(23, 2, "URI has whitespace: {" << url << "}"); switch (Config.uri_whitespace) { @@ -433,8 +440,7 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest &request) } } - request.method = method; - request.url.parseFinish(protocol, proto, urlpath, host, SBuf(login), port); + parseFinish(protocol, proto, urlpath, foundHost, SBuf(login), foundPort); return true; }