/*
- * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
+ * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
#include "SquidString.h"
#include "URL.h"
-static HttpRequest *urlParseFinish(const HttpRequestMethod& method,
- const AnyP::ProtocolType protocol,
- const char *const protoStr,
- const char *const urlpath,
- const char *const host,
- const SBuf &login,
- const int port,
- HttpRequest *request);
-static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request);
static const char valid_hostname_chars_u[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
/*
* Parse a URI/URL.
*
- * If the 'request' arg is non-NULL, put parsed values there instead
- * of allocating a new HttpRequest.
+ * Stores parsed values in the `request` argument.
*
* This abuses HttpRequest as a way of representing the parsed url
* and its components.
* its partial or not (ie, it handles the case of no trailing slash as
* being "end of host with implied path of /".
*/
-HttpRequest *
-urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
+bool
+URL::parse(const HttpRequestMethod& method, const char *url)
{
LOCAL_ARRAY(char, proto, MAX_URL);
LOCAL_ARRAY(char, login, MAX_URL);
- LOCAL_ARRAY(char, host, MAX_URL);
+ LOCAL_ARRAY(char, foundHost, MAX_URL);
LOCAL_ARRAY(char, urlpath, MAX_URL);
char *t = NULL;
char *q = NULL;
- int port;
+ int foundPort;
AnyP::ProtocolType protocol = AnyP::PROTO_NONE;
int l;
int i;
const char *src;
char *dst;
- proto[0] = host[0] = urlpath[0] = login[0] = '\0';
+ proto[0] = foundHost[0] = urlpath[0] = login[0] = '\0';
if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
- /* terminate so it doesn't overflow other buffers */
- *(url + (MAX_URL >> 1)) = '\0';
- debugs(23, DBG_IMPORTANT, "urlParse: URL too large (" << l << " bytes)");
- return NULL;
+ debugs(23, DBG_IMPORTANT, MYNAME << "URL too large (" << l << " bytes)");
+ return false;
}
if (method == Http::METHOD_CONNECT) {
- port = CONNECT_PORT;
+ /*
+ * RFC 7230 section 5.3.3: authority-form = authority
+ * "excluding any userinfo and its "@" delimiter"
+ *
+ * RFC 3986 section 3.2: authority = [ userinfo "@" ] host [ ":" port ]
+ *
+ * As an HTTP(S) proxy we assume HTTPS (443) if no port provided.
+ */
+ foundPort = 443;
- if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
- if (sscanf(url, "%[^:]:%d", host, &port) < 1)
- return NULL;
+ if (sscanf(url, "[%[^]]]:%d", foundHost, &foundPort) < 1)
+ if (sscanf(url, "%[^:]:%d", foundHost, &foundPort) < 1)
+ return false;
} else if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
URL::Asterisk().cmp(url) == 0) {
- protocol = AnyP::PROTO_HTTP;
- port = 80; // or the slow way ... AnyP::UriScheme(protocol,"http").defaultPort();
- return urlParseFinish(method, protocol, "http", url, host, SBuf(), port, request);
- } else if (!strncmp(url, "urn:", 4)) {
- return urnParse(method, url, request);
+ parseFinish(AnyP::PROTO_HTTP, nullptr, url, foundHost, SBuf(), 80 /* HTTP default port */);
+ return true;
+ } else if (strncmp(url, "urn:", 4) == 0) {
+ debugs(23, 3, "Split URI '" << url << "' into proto='urn', path='" << (url+4) << "'");
+ debugs(50, 5, "urn=" << (url+4));
+ setScheme(AnyP::PROTO_URN, nullptr);
+ path(url + 4);
+ return true;
} else {
/* Parse the URL: */
src = url;
*dst = *src;
}
if (i >= l)
- return NULL;
+ return false;
*dst = '\0';
/* Then its :// */
if ((i+3) > l || *src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
- return NULL;
+ return false;
i += 3;
src += 3;
// bug 1881: If we don't get a "/" then we imply it was there
// bug 3074: We could just be given a "?" or "#". These also imply "/"
// bug 3233: whitespace is also a hostname delimiter.
- for (dst = host; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
+ for (dst = foundHost; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
*dst = *src;
}
* been -given- a valid URL and the path is just '/'.
*/
if (i > l)
- return NULL;
+ return false;
*dst = '\0';
// bug 3074: received 'path' starting with '?', '#', or '\0' implies '/'
/* We -could- be at the end of the buffer here */
if (i > l)
- return NULL;
+ return false;
/* If the URL path is empty we set it to be "/" */
if (dst == urlpath) {
*dst = '/';
*dst = '\0';
protocol = urlParseProtocol(proto);
- port = AnyP::UriScheme(protocol).defaultPort();
+ foundPort = AnyP::UriScheme(protocol).defaultPort();
/* Is there any login information? (we should eventually parse it above) */
- t = strrchr(host, '@');
+ t = strrchr(foundHost, '@');
if (t != NULL) {
- strncpy((char *) login, (char *) host, sizeof(login)-1);
+ strncpy((char *) login, (char *) foundHost, sizeof(login)-1);
login[sizeof(login)-1] = '\0';
t = strrchr(login, '@');
*t = 0;
- strncpy((char *) host, t + 1, sizeof(host)-1);
- host[sizeof(host)-1] = '\0';
+ strncpy((char *) foundHost, t + 1, sizeof(foundHost)-1);
+ foundHost[sizeof(foundHost)-1] = '\0';
// Bug 4498: URL-unescape the login info after extraction
rfc1738_unescape(login);
}
/* Is there any host information? (we should eventually parse it above) */
- if (*host == '[') {
+ if (*foundHost == '[') {
/* strip any IPA brackets. valid under IPv6. */
- dst = host;
+ dst = foundHost;
/* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
- src = host;
+ src = foundHost;
++src;
- l = strlen(host);
+ l = strlen(foundHost);
i = 1;
for (; i < l && *src != ']' && *src != '\0'; ++i, ++src, ++dst) {
*dst = *src;
++dst;
t = dst;
} else {
- t = strrchr(host, ':');
+ t = strrchr(foundHost, ':');
- if (t != strchr(host,':') ) {
+ if (t != strchr(foundHost,':') ) {
/* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
/* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
/* therefore we MUST accept the case where they are not bracketed at all. */
}
// Bug 3183 sanity check: If scheme is present, host must be too.
- if (protocol != AnyP::PROTO_NONE && host[0] == '\0') {
+ if (protocol != AnyP::PROTO_NONE && foundHost[0] == '\0') {
debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details.");
- return NULL;
+ return false;
}
if (t && *t == ':') {
*t = '\0';
++t;
- port = atoi(t);
+ foundPort = atoi(t);
}
}
- for (t = host; *t; ++t)
+ for (t = foundHost; *t; ++t)
*t = xtolower(*t);
- if (stringHasWhitespace(host)) {
+ if (stringHasWhitespace(foundHost)) {
if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
- t = q = host;
+ t = q = foundHost;
while (*t) {
if (!xisspace(*t)) {
*q = *t;
}
}
- debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
+ debugs(23, 3, "Split URL '" << url << "' into proto='" << proto << "', host='" << foundHost << "', port='" << foundPort << "', path='" << urlpath << "'");
- if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
- debugs(23, DBG_IMPORTANT, "urlParse: Illegal character in hostname '" << host << "'");
- return NULL;
+ if (Config.onoff.check_hostnames &&
+ strspn(foundHost, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(foundHost)) {
+ debugs(23, DBG_IMPORTANT, MYNAME << "Illegal character in hostname '" << foundHost << "'");
+ return false;
}
/* For IPV6 addresses also check for a colon */
- if (Config.appendDomain && !strchr(host, '.') && !strchr(host, ':'))
- strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
+ if (Config.appendDomain && !strchr(foundHost, '.') && !strchr(foundHost, ':'))
+ strncat(foundHost, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(foundHost) - 1);
/* remove trailing dots from hostnames */
- while ((l = strlen(host)) > 0 && host[--l] == '.')
- host[l] = '\0';
+ while ((l = strlen(foundHost)) > 0 && foundHost[--l] == '.')
+ foundHost[l] = '\0';
/* reject duplicate or leading dots */
- if (strstr(host, "..") || *host == '.') {
- debugs(23, DBG_IMPORTANT, "urlParse: Illegal hostname '" << host << "'");
- return NULL;
+ if (strstr(foundHost, "..") || *foundHost == '.') {
+ debugs(23, DBG_IMPORTANT, MYNAME << "Illegal hostname '" << foundHost << "'");
+ return false;
}
- if (port < 1 || port > 65535) {
- debugs(23, 3, "urlParse: Invalid port '" << port << "'");
- return NULL;
+ if (foundPort < 1 || foundPort > 65535) {
+ debugs(23, 3, "Invalid port '" << foundPort << "'");
+ return false;
}
#if HARDCODE_DENY_PORTS
/* These ports are filtered in the default squid.conf, but
* maybe someone wants them hardcoded... */
- if (port == 7 || port == 9 || port == 19) {
- debugs(23, DBG_CRITICAL, "urlParse: Deny access to port " << port);
- return NULL;
+ if (foundPort == 7 || foundPort == 9 || foundPort == 19) {
+ debugs(23, DBG_CRITICAL, MYNAME << "Deny access to port " << foundPort);
+ return false;
}
#endif
if (stringHasWhitespace(urlpath)) {
- debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
+ debugs(23, 2, "URI has whitespace: {" << url << "}");
switch (Config.uri_whitespace) {
case URI_WHITESPACE_DENY:
- return NULL;
+ return false;
case URI_WHITESPACE_ALLOW:
break;
}
}
- return urlParseFinish(method, protocol, proto, urlpath, host, SBuf(login), port, request);
+ parseFinish(protocol, proto, urlpath, foundHost, SBuf(login), foundPort);
+ return true;
}
-/**
- * Update request with parsed URI data. If the request arg is
- * non-NULL, put parsed values there instead of allocating a new
- * HttpRequest.
- */
-static HttpRequest *
-urlParseFinish(const HttpRequestMethod& method,
- const AnyP::ProtocolType protocol,
- const char *const protoStr, // for unknown protocols
- const char *const urlpath,
- const char *const host,
- const SBuf &login,
- const int port,
- HttpRequest *request)
-{
- if (NULL == request)
- request = new HttpRequest(method, protocol, protoStr, urlpath);
- else {
- request->initHTTP(method, protocol, protoStr, urlpath);
- }
-
- request->url.host(host);
- request->url.userInfo(login);
- request->url.port(port);
- return request;
-}
-
-static HttpRequest *
-urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request)
+/// Update the URL object with parsed URI data.
+void
+URL::parseFinish(const AnyP::ProtocolType protocol,
+ const char *const protoStr, // for unknown protocols
+ const char *const aUrlPath,
+ const char *const aHost,
+ const SBuf &aLogin,
+ const int aPort)
{
- debugs(50, 5, "urnParse: " << urn);
- if (request) {
- request->initHTTP(method, AnyP::PROTO_URN, "urn", urn + 4);
- return request;
- }
-
- return new HttpRequest(method, AnyP::PROTO_URN, "urn", urn + 4);
+ setScheme(protocol, protoStr);
+ path(aUrlPath);
+ host(aHost);
+ userInfo(aLogin);
+ port(aPort);
}
void
case AnyP::PROTO_HTTPS:
#if USE_OPENSSL
rc = 1;
+#elif USE_GNUTLS
+ rc = 1;
#else
/*
* Squid can't originate an SSL connection, so it should