/*
- * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
+ * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
#include "SquidString.h"
#include "URL.h"
-static HttpRequest *urlParseFinish(const HttpRequestMethod& method,
- const AnyP::ProtocolType protocol,
- const char *const urlpath,
- const char *const host,
- const SBuf &login,
- const int port,
- HttpRequest *request);
-static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request);
static const char valid_hostname_chars_u[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
assert(0 == matchDomainName("foo.com", ".foo.com"));
assert(0 == matchDomainName(".foo.com", ".foo.com"));
assert(0 == matchDomainName("x.foo.com", ".foo.com"));
+ assert(0 == matchDomainName("y.x.foo.com", ".foo.com"));
assert(0 != matchDomainName("x.foo.com", "foo.com"));
assert(0 != matchDomainName("foo.com", "x.foo.com"));
assert(0 != matchDomainName("bar.com", "foo.com"));
assert(0 < matchDomainName("bfoo.com", "afoo.com"));
assert(0 > matchDomainName("afoo.com", "bfoo.com"));
assert(0 < matchDomainName("x-foo.com", ".foo.com"));
+
+ assert(0 == matchDomainName(".foo.com", ".foo.com", mdnRejectSubsubDomains));
+ assert(0 == matchDomainName("x.foo.com", ".foo.com", mdnRejectSubsubDomains));
+ assert(0 != matchDomainName("y.x.foo.com", ".foo.com", mdnRejectSubsubDomains));
+ assert(0 != matchDomainName(".x.foo.com", ".foo.com", mdnRejectSubsubDomains));
+
+ assert(0 == matchDomainName("*.foo.com", "x.foo.com", mdnHonorWildcards));
+ assert(0 == matchDomainName("*.foo.com", ".x.foo.com", mdnHonorWildcards));
+ assert(0 == matchDomainName("*.foo.com", ".foo.com", mdnHonorWildcards));
+ assert(0 != matchDomainName("*.foo.com", "foo.com", mdnHonorWildcards));
+
/* more cases? */
}
/**
- * urlParseProtocol() takes begin (b) and end (e) pointers, but for
- * backwards compatibility, e defaults to NULL, in which case we
- * assume b is NULL-terminated.
+ * Parse the scheme name from string b, into protocol type.
+ * The string must be 0-terminated.
*/
AnyP::ProtocolType
-urlParseProtocol(const char *b, const char *e)
+urlParseProtocol(const char *b)
{
- /*
- * if e is NULL, b must be NULL terminated and we
- * make e point to the first whitespace character
- * after b.
- */
-
- if (NULL == e)
- e = b + strcspn(b, ":");
-
+ // make e point to the ':' character
+ const char *e = b + strcspn(b, ":");
int len = e - b;
/* test common stuff first */
if (strncasecmp(b, "whois", len) == 0)
return AnyP::PROTO_WHOIS;
+ if (len > 0)
+ return AnyP::PROTO_UNKNOWN;
+
return AnyP::PROTO_NONE;
}
/*
* Parse a URI/URL.
*
- * If the 'request' arg is non-NULL, put parsed values there instead
- * of allocating a new HttpRequest.
+ * Stores parsed values in the `request` argument.
*
* This abuses HttpRequest as a way of representing the parsed url
* and its components.
* its partial or not (ie, it handles the case of no trailing slash as
* being "end of host with implied path of /".
*/
-HttpRequest *
-urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
+bool
+URL::parse(const HttpRequestMethod& method, const char *url)
{
LOCAL_ARRAY(char, proto, MAX_URL);
LOCAL_ARRAY(char, login, MAX_URL);
- LOCAL_ARRAY(char, host, MAX_URL);
+ LOCAL_ARRAY(char, foundHost, MAX_URL);
LOCAL_ARRAY(char, urlpath, MAX_URL);
char *t = NULL;
char *q = NULL;
- int port;
+ int foundPort;
AnyP::ProtocolType protocol = AnyP::PROTO_NONE;
int l;
int i;
const char *src;
char *dst;
- proto[0] = host[0] = urlpath[0] = login[0] = '\0';
+ proto[0] = foundHost[0] = urlpath[0] = login[0] = '\0';
if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
- /* terminate so it doesn't overflow other buffers */
- *(url + (MAX_URL >> 1)) = '\0';
- debugs(23, DBG_IMPORTANT, "urlParse: URL too large (" << l << " bytes)");
- return NULL;
+ debugs(23, DBG_IMPORTANT, MYNAME << "URL too large (" << l << " bytes)");
+ return false;
}
if (method == Http::METHOD_CONNECT) {
- port = CONNECT_PORT;
+ /*
+ * RFC 7230 section 5.3.3: authority-form = authority
+ * "excluding any userinfo and its "@" delimiter"
+ *
+ * RFC 3986 section 3.2: authority = [ userinfo "@" ] host [ ":" port ]
+ *
+ * As an HTTP(S) proxy we assume HTTPS (443) if no port provided.
+ */
+ foundPort = 443;
- if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
- if (sscanf(url, "%[^:]:%d", host, &port) < 1)
- return NULL;
+ if (sscanf(url, "[%[^]]]:%d", foundHost, &foundPort) < 1)
+ if (sscanf(url, "%[^:]:%d", foundHost, &foundPort) < 1)
+ return false;
} else if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
URL::Asterisk().cmp(url) == 0) {
- protocol = AnyP::PROTO_HTTP;
- port = AnyP::UriScheme(protocol).defaultPort();
- return urlParseFinish(method, protocol, url, host, SBuf(), port, request);
- } else if (!strncmp(url, "urn:", 4)) {
- return urnParse(method, url, request);
+ parseFinish(AnyP::PROTO_HTTP, nullptr, url, foundHost, SBuf(), 80 /* HTTP default port */);
+ return true;
+ } else if (strncmp(url, "urn:", 4) == 0) {
+ debugs(23, 3, "Split URI '" << url << "' into proto='urn', path='" << (url+4) << "'");
+ debugs(50, 5, "urn=" << (url+4));
+ setScheme(AnyP::PROTO_URN, nullptr);
+ path(url + 4);
+ return true;
} else {
/* Parse the URL: */
src = url;
*dst = *src;
}
if (i >= l)
- return NULL;
+ return false;
*dst = '\0';
/* Then its :// */
if ((i+3) > l || *src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
- return NULL;
+ return false;
i += 3;
src += 3;
// bug 1881: If we don't get a "/" then we imply it was there
// bug 3074: We could just be given a "?" or "#". These also imply "/"
// bug 3233: whitespace is also a hostname delimiter.
- for (dst = host; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
+ for (dst = foundHost; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
*dst = *src;
}
* been -given- a valid URL and the path is just '/'.
*/
if (i > l)
- return NULL;
+ return false;
*dst = '\0';
// bug 3074: received 'path' starting with '?', '#', or '\0' implies '/'
/* We -could- be at the end of the buffer here */
if (i > l)
- return NULL;
+ return false;
/* If the URL path is empty we set it to be "/" */
if (dst == urlpath) {
*dst = '/';
*dst = '\0';
protocol = urlParseProtocol(proto);
- port = AnyP::UriScheme(protocol).defaultPort();
+ foundPort = AnyP::UriScheme(protocol).defaultPort();
/* Is there any login information? (we should eventually parse it above) */
- t = strrchr(host, '@');
+ t = strrchr(foundHost, '@');
if (t != NULL) {
- strncpy((char *) login, (char *) host, sizeof(login)-1);
+ strncpy((char *) login, (char *) foundHost, sizeof(login)-1);
login[sizeof(login)-1] = '\0';
t = strrchr(login, '@');
*t = 0;
- strncpy((char *) host, t + 1, sizeof(host)-1);
- host[sizeof(host)-1] = '\0';
+ strncpy((char *) foundHost, t + 1, sizeof(foundHost)-1);
+ foundHost[sizeof(foundHost)-1] = '\0';
+ // Bug 4498: URL-unescape the login info after extraction
+ rfc1738_unescape(login);
}
/* Is there any host information? (we should eventually parse it above) */
- if (*host == '[') {
+ if (*foundHost == '[') {
/* strip any IPA brackets. valid under IPv6. */
- dst = host;
+ dst = foundHost;
/* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
- src = host;
+ src = foundHost;
++src;
- l = strlen(host);
+ l = strlen(foundHost);
i = 1;
for (; i < l && *src != ']' && *src != '\0'; ++i, ++src, ++dst) {
*dst = *src;
++dst;
t = dst;
} else {
- t = strrchr(host, ':');
+ t = strrchr(foundHost, ':');
- if (t != strchr(host,':') ) {
+ if (t != strchr(foundHost,':') ) {
/* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
/* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
/* therefore we MUST accept the case where they are not bracketed at all. */
}
// Bug 3183 sanity check: If scheme is present, host must be too.
- if (protocol != AnyP::PROTO_NONE && host[0] == '\0') {
+ if (protocol != AnyP::PROTO_NONE && foundHost[0] == '\0') {
debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details.");
- return NULL;
+ return false;
}
if (t && *t == ':') {
*t = '\0';
++t;
- port = atoi(t);
+ foundPort = atoi(t);
}
}
- for (t = host; *t; ++t)
+ for (t = foundHost; *t; ++t)
*t = xtolower(*t);
- if (stringHasWhitespace(host)) {
+ if (stringHasWhitespace(foundHost)) {
if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
- t = q = host;
+ t = q = foundHost;
while (*t) {
if (!xisspace(*t)) {
*q = *t;
}
}
- debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
+ debugs(23, 3, "Split URL '" << url << "' into proto='" << proto << "', host='" << foundHost << "', port='" << foundPort << "', path='" << urlpath << "'");
- if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
- debugs(23, DBG_IMPORTANT, "urlParse: Illegal character in hostname '" << host << "'");
- return NULL;
+ if (Config.onoff.check_hostnames &&
+ strspn(foundHost, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(foundHost)) {
+ debugs(23, DBG_IMPORTANT, MYNAME << "Illegal character in hostname '" << foundHost << "'");
+ return false;
}
/* For IPV6 addresses also check for a colon */
- if (Config.appendDomain && !strchr(host, '.') && !strchr(host, ':'))
- strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
+ if (Config.appendDomain && !strchr(foundHost, '.') && !strchr(foundHost, ':'))
+ strncat(foundHost, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(foundHost) - 1);
/* remove trailing dots from hostnames */
- while ((l = strlen(host)) > 0 && host[--l] == '.')
- host[l] = '\0';
+ while ((l = strlen(foundHost)) > 0 && foundHost[--l] == '.')
+ foundHost[l] = '\0';
/* reject duplicate or leading dots */
- if (strstr(host, "..") || *host == '.') {
- debugs(23, DBG_IMPORTANT, "urlParse: Illegal hostname '" << host << "'");
- return NULL;
+ if (strstr(foundHost, "..") || *foundHost == '.') {
+ debugs(23, DBG_IMPORTANT, MYNAME << "Illegal hostname '" << foundHost << "'");
+ return false;
}
- if (port < 1 || port > 65535) {
- debugs(23, 3, "urlParse: Invalid port '" << port << "'");
- return NULL;
+ if (foundPort < 1 || foundPort > 65535) {
+ debugs(23, 3, "Invalid port '" << foundPort << "'");
+ return false;
}
#if HARDCODE_DENY_PORTS
/* These ports are filtered in the default squid.conf, but
* maybe someone wants them hardcoded... */
- if (port == 7 || port == 9 || port == 19) {
- debugs(23, DBG_CRITICAL, "urlParse: Deny access to port " << port);
- return NULL;
+ if (foundPort == 7 || foundPort == 9 || foundPort == 19) {
+ debugs(23, DBG_CRITICAL, MYNAME << "Deny access to port " << foundPort);
+ return false;
}
#endif
if (stringHasWhitespace(urlpath)) {
- debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
+ debugs(23, 2, "URI has whitespace: {" << url << "}");
switch (Config.uri_whitespace) {
case URI_WHITESPACE_DENY:
- return NULL;
+ return false;
case URI_WHITESPACE_ALLOW:
break;
}
}
- return urlParseFinish(method, protocol, urlpath, host, SBuf(login), port, request);
+ parseFinish(protocol, proto, urlpath, foundHost, SBuf(login), foundPort);
+ return true;
}
-/**
- * Update request with parsed URI data. If the request arg is
- * non-NULL, put parsed values there instead of allocating a new
- * HttpRequest.
- */
-static HttpRequest *
-urlParseFinish(const HttpRequestMethod& method,
- const AnyP::ProtocolType protocol,
- const char *const urlpath,
- const char *const host,
- const SBuf &login,
- const int port,
- HttpRequest *request)
+/// Update the URL object with parsed URI data.
+void
+URL::parseFinish(const AnyP::ProtocolType protocol,
+ const char *const protoStr, // for unknown protocols
+ const char *const aUrlPath,
+ const char *const aHost,
+ const SBuf &aLogin,
+ const int aPort)
{
- if (NULL == request)
- request = new HttpRequest(method, protocol, urlpath);
- else {
- request->initHTTP(method, protocol, urlpath);
- safe_free(request->canonical);
- }
-
- request->SetHost(host);
- request->url.userInfo(login);
- request->url.port(port);
- return request;
-}
-
-static HttpRequest *
-urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request)
-{
- debugs(50, 5, "urnParse: " << urn);
- if (request) {
- request->initHTTP(method, AnyP::PROTO_URN, urn + 4);
- safe_free(request->canonical);
- return request;
- }
-
- return new HttpRequest(method, AnyP::PROTO_URN, urn + 4);
+ setScheme(protocol, protoStr);
+ path(aUrlPath);
+ host(aHost);
+ userInfo(aLogin);
+ port(aPort);
}
void
// TODO: most URL will be much shorter, avoid allocating this much
absolute_.reserveCapacity(MAX_URL);
- absolute_.appendf("%s:", getScheme().c_str());
+ absolute_.append(getScheme().image());
+ absolute_.append(":",1);
if (getScheme() != AnyP::PROTO_URN) {
absolute_.append("//", 2);
const bool omitUserInfo = getScheme() == AnyP::PROTO_HTTP ||
return absolute_;
}
-const char *
-urlCanonical(HttpRequest * request)
-{
- if (request->canonical)
- return request->canonical;
-
- SBuf url;
- if (request->method.id() == Http::METHOD_CONNECT)
- url = request->url.authority(true); // host:port
- else
- url = request->url.absolute();
-
- return (request->canonical = xstrndup(url.rawContent(), url.length()+1));
-}
-
-/** \todo AYJ: Performance: This is an *almost* duplicate of urlCanonical. But elides the query-string.
+/** \todo AYJ: Performance: This is an *almost* duplicate of HttpRequest::effectiveRequestUri(). But elides the query-string.
* After copying it on in the first place! Would be less code to merge the two with a flag parameter.
* and never copy the query-string part in the first place
*/
{
LOCAL_ARRAY(char, buf, MAX_URL);
- snprintf(buf, sizeof(buf), "%s", urlCanonical(const_cast<HttpRequest *>(request)));
+ snprintf(buf, sizeof(buf), SQUIDSBUFPH, SQUIDSBUFPRINT(request->effectiveRequestUri()));
buf[sizeof(buf)-1] = '\0';
// URN, CONNECT method, and non-stripped URIs can go straight out
- if (!(request->url.getScheme() == AnyP::PROTO_URN ||
- !Config.onoff.strip_query_terms ||
- request->method == Http::METHOD_CONNECT
- )) {
-
+ if (Config.onoff.strip_query_terms && !(request->method == Http::METHOD_CONNECT || request->url.getScheme() == AnyP::PROTO_URN)) {
// strip anything AFTER a question-mark
// leaving the '?' in place
if (auto t = strchr(buf, '?')) {
}
SBuf authorityForm = req->url.authority(); // host[:port]
- size_t urllen = snprintf(urlbuf, MAX_URL, "%s://" SQUIDSBUFPH "%s" SQUIDSBUFPH,
- req->url.getScheme().c_str(),
+ const SBuf &scheme = req->url.getScheme().image();
+ size_t urllen = snprintf(urlbuf, MAX_URL, SQUIDSBUFPH "://" SQUIDSBUFPH "%s" SQUIDSBUFPH,
+ SQUIDSBUFPRINT(scheme),
SQUIDSBUFPRINT(req->url.userInfo()),
!req->url.userInfo().isEmpty() ? "@" : "",
SQUIDSBUFPRINT(authorityForm));
// XXX: crops bits in the middle of the combined URL.
lastSlashPos = MAX_URL - urllen - 1;
}
- xstrncpy(&urlbuf[urllen], path.rawContent(), lastSlashPos);
+ SBufToCstring(&urlbuf[urllen], path.substr(0,lastSlashPos));
urllen += lastSlashPos;
if (urllen + 1 < MAX_URL) {
xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
}
int
-matchDomainName(const char *h, const char *d, bool honorWildcards)
+matchDomainName(const char *h, const char *d, uint flags)
{
int dl;
int hl;
+ const bool hostIncludesSubdomains = (*h == '.');
while ('.' == *h)
++h;
hl = strlen(h);
+ if (hl == 0)
+ return -1;
+
dl = strlen(d);
/*
* is a leading '.'.
*/
- if ('.' == d[0])
- return 0;
- else
+ if ('.' == d[0]) {
+ if (flags & mdnRejectSubsubDomains) {
+ // Check for sub-sub domain and reject
+ while(--hl >= 0 && h[hl] != '.');
+ if (hl < 0) {
+ // No sub-sub domain found, but reject if there is a
+ // leading dot in given host string (which is removed
+ // before the check is started).
+ return hostIncludesSubdomains ? 1 : 0;
+ } else
+ return 1; // sub-sub domain, reject
+ } else
+ return 0;
+ } else
return 1;
}
}
// If the h has a form of "*.foo.com" and d has a form of "x.foo.com"
// then the h[hl] points to '*', h[hl+1] to '.' and d[dl] to 'x'
// The following checks are safe, the "h[hl + 1]" in the worst case is '\0'.
- if (honorWildcards && h[hl] == '*' && h[hl + 1] == '.')
+ if ((flags & mdnHonorWildcards) && h[hl] == '*' && h[hl + 1] == '.')
return 0;
/*
// we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
// we also support forwarding OPTIONS and TRACE, except for the *-URI ones
if (r->method == Http::METHOD_OPTIONS || r->method == Http::METHOD_TRACE)
- return (r->header.getInt64(HDR_MAX_FORWARDS) == 0 || r->url.path() != URL::Asterisk());
+ return (r->header.getInt64(Http::HdrType::MAX_FORWARDS) == 0 || r->url.path() != URL::Asterisk());
if (r->method == Http::METHOD_PURGE)
return 1;
case AnyP::PROTO_HTTPS:
#if USE_OPENSSL
-
rc = 1;
-
- break;
-
+#elif USE_GNUTLS
+ rc = 1;
#else
/*
* Squid can't originate an SSL connection, so it should
* CONNECT instead.
*/
rc = 0;
-
#endif
+ break;
default:
break;
return Host;
}
+URL::URL(AnyP::UriScheme const &aScheme) :
+ scheme_(aScheme),
+ hostIsNumeric_(false),
+ port_(0)
+{
+ *host_=0;
+}
+