-
/*
- * DEBUG: section 23 URL Parsing
- * AUTHOR: Duane Wessels
- *
- * SQUID Web Proxy Cache http://www.squid-cache.org/
- * ----------------------------------------------------------
- *
- * Squid is the result of efforts by numerous individuals from
- * the Internet community; see the CONTRIBUTORS file for full
- * details. Many organizations have provided support for Squid's
- * development; see the SPONSORS file for full details. Squid is
- * Copyrighted (C) 2001 by the Regents of the University of
- * California; see the COPYRIGHT file for full details. Squid
- * incorporates software developed and/or copyrighted by other
- * sources; see the CREDITS file for full details.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
*
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
*/
+/* DEBUG: section 23 URL Parsing */
+
#include "squid.h"
#include "globals.h"
#include "HttpRequest.h"
#include "SquidString.h"
#include "URL.h"
-static HttpRequest *urlParseFinish(const HttpRequestMethod& method,
- const AnyP::ProtocolType protocol,
- const char *const urlpath,
- const char *const host,
- const char *const login,
- const int port,
- HttpRequest *request);
-static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request);
static const char valid_hostname_chars_u[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"[:]"
;
+const SBuf &
+URL::Asterisk()
+{
+ static SBuf star("*");
+ return star;
+}
+
+const SBuf &
+URL::SlashPath()
+{
+ static SBuf slash("/");
+ return slash;
+}
+
+void
+URL::host(const char *src)
+{
+ hostAddr_.setEmpty();
+ hostAddr_ = src;
+ if (hostAddr_.isAnyAddr()) {
+ xstrncpy(host_, src, sizeof(host_));
+ hostIsNumeric_ = false;
+ } else {
+ hostAddr_.toHostStr(host_, sizeof(host_));
+ debugs(23, 3, "given IP: " << hostAddr_);
+ hostIsNumeric_ = 1;
+ }
+ touch();
+}
+
+const SBuf &
+URL::path() const
+{
+ // RFC 3986 section 3.3 says path can be empty (path-abempty).
+ // RFC 7230 sections 2.7.3, 5.3.1, 5.7.2 - says path cannot be empty, default to "/"
+ // at least when sending and using. We must still accept path-abempty as input.
+ if (path_.isEmpty() && (scheme_ == AnyP::PROTO_HTTP || scheme_ == AnyP::PROTO_HTTPS))
+ return SlashPath();
+
+ return path_;
+}
+
void
urlInitialize(void)
{
assert(0 == matchDomainName("foo.com", ".foo.com"));
assert(0 == matchDomainName(".foo.com", ".foo.com"));
assert(0 == matchDomainName("x.foo.com", ".foo.com"));
+ assert(0 == matchDomainName("y.x.foo.com", ".foo.com"));
assert(0 != matchDomainName("x.foo.com", "foo.com"));
assert(0 != matchDomainName("foo.com", "x.foo.com"));
assert(0 != matchDomainName("bar.com", "foo.com"));
assert(0 < matchDomainName("bfoo.com", "afoo.com"));
assert(0 > matchDomainName("afoo.com", "bfoo.com"));
assert(0 < matchDomainName("x-foo.com", ".foo.com"));
+
+ assert(0 == matchDomainName(".foo.com", ".foo.com", mdnRejectSubsubDomains));
+ assert(0 == matchDomainName("x.foo.com", ".foo.com", mdnRejectSubsubDomains));
+ assert(0 != matchDomainName("y.x.foo.com", ".foo.com", mdnRejectSubsubDomains));
+ assert(0 != matchDomainName(".x.foo.com", ".foo.com", mdnRejectSubsubDomains));
+
+ assert(0 == matchDomainName("*.foo.com", "x.foo.com", mdnHonorWildcards));
+ assert(0 == matchDomainName("*.foo.com", ".x.foo.com", mdnHonorWildcards));
+ assert(0 == matchDomainName("*.foo.com", ".foo.com", mdnHonorWildcards));
+ assert(0 != matchDomainName("*.foo.com", "foo.com", mdnHonorWildcards));
+
/* more cases? */
}
/**
- * urlParseProtocol() takes begin (b) and end (e) pointers, but for
- * backwards compatibility, e defaults to NULL, in which case we
- * assume b is NULL-terminated.
+ * Parse the scheme name from string b, into protocol type.
+ * The string must be 0-terminated.
*/
AnyP::ProtocolType
-urlParseProtocol(const char *b, const char *e)
+urlParseProtocol(const char *b)
{
- /*
- * if e is NULL, b must be NULL terminated and we
- * make e point to the first whitespace character
- * after b.
- */
-
- if (NULL == e)
- e = b + strcspn(b, ":");
-
+ // make e point to the ':' character
+ const char *e = b + strcspn(b, ":");
int len = e - b;
/* test common stuff first */
if (strncasecmp(b, "whois", len) == 0)
return AnyP::PROTO_WHOIS;
- if (strncasecmp(b, "internal", len) == 0)
- return AnyP::PROTO_INTERNAL;
+ if (len > 0)
+ return AnyP::PROTO_UNKNOWN;
return AnyP::PROTO_NONE;
}
-int
-urlDefaultPort(AnyP::ProtocolType p)
-{
- switch (p) {
-
- case AnyP::PROTO_HTTP:
- return 80;
-
- case AnyP::PROTO_HTTPS:
- return 443;
-
- case AnyP::PROTO_FTP:
- return 21;
-
- case AnyP::PROTO_COAP:
- case AnyP::PROTO_COAPS:
- // coaps:// default is TBA as of draft-ietf-core-coap-08.
- // Assuming IANA policy of allocating same port for base and TLS protocol versions will occur.
- return 5683;
-
- case AnyP::PROTO_GOPHER:
- return 70;
-
- case AnyP::PROTO_WAIS:
- return 210;
-
- case AnyP::PROTO_CACHE_OBJECT:
-
- case AnyP::PROTO_INTERNAL:
- return CACHE_HTTP_PORT;
-
- case AnyP::PROTO_WHOIS:
- return 43;
-
- default:
- return 0;
- }
-}
-
/*
* Parse a URI/URL.
*
- * If the 'request' arg is non-NULL, put parsed values there instead
- * of allocating a new HttpRequest.
+ * Stores parsed values in the `request` argument.
*
* This abuses HttpRequest as a way of representing the parsed url
* and its components.
* its partial or not (ie, it handles the case of no trailing slash as
* being "end of host with implied path of /".
*/
-HttpRequest *
-urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
+bool
+URL::parse(const HttpRequestMethod& method, const char *url)
{
LOCAL_ARRAY(char, proto, MAX_URL);
LOCAL_ARRAY(char, login, MAX_URL);
- LOCAL_ARRAY(char, host, MAX_URL);
+ LOCAL_ARRAY(char, foundHost, MAX_URL);
LOCAL_ARRAY(char, urlpath, MAX_URL);
char *t = NULL;
char *q = NULL;
- int port;
+ int foundPort;
AnyP::ProtocolType protocol = AnyP::PROTO_NONE;
int l;
int i;
const char *src;
char *dst;
- proto[0] = host[0] = urlpath[0] = login[0] = '\0';
+ proto[0] = foundHost[0] = urlpath[0] = login[0] = '\0';
if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
- /* terminate so it doesn't overflow other buffers */
- *(url + (MAX_URL >> 1)) = '\0';
- debugs(23, DBG_IMPORTANT, "urlParse: URL too large (" << l << " bytes)");
- return NULL;
+ debugs(23, DBG_IMPORTANT, MYNAME << "URL too large (" << l << " bytes)");
+ return false;
}
if (method == Http::METHOD_CONNECT) {
- port = CONNECT_PORT;
+ /*
+ * RFC 7230 section 5.3.3: authority-form = authority
+ * "excluding any userinfo and its "@" delimiter"
+ *
+ * RFC 3986 section 3.2: authority = [ userinfo "@" ] host [ ":" port ]
+ *
+ * As an HTTP(S) proxy we assume HTTPS (443) if no port provided.
+ */
+ foundPort = 443;
- if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
- if (sscanf(url, "%[^:]:%d", host, &port) < 1)
- return NULL;
+ if (sscanf(url, "[%[^]]]:%d", foundHost, &foundPort) < 1)
+ if (sscanf(url, "%[^:]:%d", foundHost, &foundPort) < 1)
+ return false;
} else if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
- strcmp(url, "*") == 0) {
- protocol = AnyP::PROTO_HTTP;
- port = urlDefaultPort(protocol);
- return urlParseFinish(method, protocol, url, host, login, port, request);
- } else if (!strncmp(url, "urn:", 4)) {
- return urnParse(method, url, request);
+ URL::Asterisk().cmp(url) == 0) {
+ parseFinish(AnyP::PROTO_HTTP, nullptr, url, foundHost, SBuf(), 80 /* HTTP default port */);
+ return true;
+ } else if (strncmp(url, "urn:", 4) == 0) {
+ debugs(23, 3, "Split URI '" << url << "' into proto='urn', path='" << (url+4) << "'");
+ debugs(50, 5, "urn=" << (url+4));
+ setScheme(AnyP::PROTO_URN, nullptr);
+ path(url + 4);
+ return true;
} else {
/* Parse the URL: */
src = url;
*dst = *src;
}
if (i >= l)
- return NULL;
+ return false;
*dst = '\0';
/* Then its :// */
if ((i+3) > l || *src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
- return NULL;
+ return false;
i += 3;
src += 3;
// bug 1881: If we don't get a "/" then we imply it was there
// bug 3074: We could just be given a "?" or "#". These also imply "/"
// bug 3233: whitespace is also a hostname delimiter.
- for (dst = host; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
+ for (dst = foundHost; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
*dst = *src;
}
* been -given- a valid URL and the path is just '/'.
*/
if (i > l)
- return NULL;
+ return false;
*dst = '\0';
// bug 3074: received 'path' starting with '?', '#', or '\0' implies '/'
/* We -could- be at the end of the buffer here */
if (i > l)
- return NULL;
+ return false;
/* If the URL path is empty we set it to be "/" */
if (dst == urlpath) {
*dst = '/';
*dst = '\0';
protocol = urlParseProtocol(proto);
- port = urlDefaultPort(protocol);
+ foundPort = AnyP::UriScheme(protocol).defaultPort();
/* Is there any login information? (we should eventually parse it above) */
- t = strrchr(host, '@');
+ t = strrchr(foundHost, '@');
if (t != NULL) {
- strncpy((char *) login, (char *) host, sizeof(login)-1);
+ strncpy((char *) login, (char *) foundHost, sizeof(login)-1);
login[sizeof(login)-1] = '\0';
t = strrchr(login, '@');
*t = 0;
- strncpy((char *) host, t + 1, sizeof(host)-1);
- host[sizeof(host)-1] = '\0';
+ strncpy((char *) foundHost, t + 1, sizeof(foundHost)-1);
+ foundHost[sizeof(foundHost)-1] = '\0';
+ // Bug 4498: URL-unescape the login info after extraction
+ rfc1738_unescape(login);
}
/* Is there any host information? (we should eventually parse it above) */
- if (*host == '[') {
+ if (*foundHost == '[') {
/* strip any IPA brackets. valid under IPv6. */
- dst = host;
+ dst = foundHost;
/* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
- src = host;
+ src = foundHost;
++src;
- l = strlen(host);
+ l = strlen(foundHost);
i = 1;
for (; i < l && *src != ']' && *src != '\0'; ++i, ++src, ++dst) {
*dst = *src;
++dst;
t = dst;
} else {
- t = strrchr(host, ':');
+ t = strrchr(foundHost, ':');
- if (t != strchr(host,':') ) {
+ if (t != strchr(foundHost,':') ) {
/* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
/* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
/* therefore we MUST accept the case where they are not bracketed at all. */
}
// Bug 3183 sanity check: If scheme is present, host must be too.
- if (protocol != AnyP::PROTO_NONE && host[0] == '\0') {
+ if (protocol != AnyP::PROTO_NONE && foundHost[0] == '\0') {
debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details.");
- return NULL;
+ return false;
}
if (t && *t == ':') {
*t = '\0';
++t;
- port = atoi(t);
+ foundPort = atoi(t);
}
}
- for (t = host; *t; ++t)
+ for (t = foundHost; *t; ++t)
*t = xtolower(*t);
- if (stringHasWhitespace(host)) {
+ if (stringHasWhitespace(foundHost)) {
if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
- t = q = host;
+ t = q = foundHost;
while (*t) {
if (!xisspace(*t)) {
*q = *t;
}
}
- debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
+ debugs(23, 3, "Split URL '" << url << "' into proto='" << proto << "', host='" << foundHost << "', port='" << foundPort << "', path='" << urlpath << "'");
- if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
- debugs(23, DBG_IMPORTANT, "urlParse: Illegal character in hostname '" << host << "'");
- return NULL;
+ if (Config.onoff.check_hostnames &&
+ strspn(foundHost, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(foundHost)) {
+ debugs(23, DBG_IMPORTANT, MYNAME << "Illegal character in hostname '" << foundHost << "'");
+ return false;
}
/* For IPV6 addresses also check for a colon */
- if (Config.appendDomain && !strchr(host, '.') && !strchr(host, ':'))
- strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
+ if (Config.appendDomain && !strchr(foundHost, '.') && !strchr(foundHost, ':'))
+ strncat(foundHost, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(foundHost) - 1);
/* remove trailing dots from hostnames */
- while ((l = strlen(host)) > 0 && host[--l] == '.')
- host[l] = '\0';
+ while ((l = strlen(foundHost)) > 0 && foundHost[--l] == '.')
+ foundHost[l] = '\0';
/* reject duplicate or leading dots */
- if (strstr(host, "..") || *host == '.') {
- debugs(23, DBG_IMPORTANT, "urlParse: Illegal hostname '" << host << "'");
- return NULL;
+ if (strstr(foundHost, "..") || *foundHost == '.') {
+ debugs(23, DBG_IMPORTANT, MYNAME << "Illegal hostname '" << foundHost << "'");
+ return false;
}
- if (port < 1 || port > 65535) {
- debugs(23, 3, "urlParse: Invalid port '" << port << "'");
- return NULL;
+ if (foundPort < 1 || foundPort > 65535) {
+ debugs(23, 3, "Invalid port '" << foundPort << "'");
+ return false;
}
#if HARDCODE_DENY_PORTS
/* These ports are filtered in the default squid.conf, but
* maybe someone wants them hardcoded... */
- if (port == 7 || port == 9 || port == 19) {
- debugs(23, DBG_CRITICAL, "urlParse: Deny access to port " << port);
- return NULL;
+ if (foundPort == 7 || foundPort == 9 || foundPort == 19) {
+ debugs(23, DBG_CRITICAL, MYNAME << "Deny access to port " << foundPort);
+ return false;
}
#endif
if (stringHasWhitespace(urlpath)) {
- debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
+ debugs(23, 2, "URI has whitespace: {" << url << "}");
switch (Config.uri_whitespace) {
case URI_WHITESPACE_DENY:
- return NULL;
+ return false;
case URI_WHITESPACE_ALLOW:
break;
}
}
- return urlParseFinish(method, protocol, urlpath, host, login, port, request);
+ parseFinish(protocol, proto, urlpath, foundHost, SBuf(login), foundPort);
+ return true;
}
-/**
- * Update request with parsed URI data. If the request arg is
- * non-NULL, put parsed values there instead of allocating a new
- * HttpRequest.
- */
-static HttpRequest *
-urlParseFinish(const HttpRequestMethod& method,
- const AnyP::ProtocolType protocol,
- const char *const urlpath,
- const char *const host,
- const char *const login,
- const int port,
- HttpRequest *request)
+/// Update the URL object with parsed URI data.
+void
+URL::parseFinish(const AnyP::ProtocolType protocol,
+ const char *const protoStr, // for unknown protocols
+ const char *const aUrlPath,
+ const char *const aHost,
+ const SBuf &aLogin,
+ const int aPort)
{
- if (NULL == request)
- request = new HttpRequest(method, protocol, urlpath);
- else {
- request->initHTTP(method, protocol, urlpath);
- safe_free(request->canonical);
- }
+ setScheme(protocol, protoStr);
+ path(aUrlPath);
+ host(aHost);
+ userInfo(aLogin);
+ port(aPort);
+}
- request->SetHost(host);
- xstrncpy(request->login, login, MAX_LOGIN_SZ);
- request->port = (unsigned short) port;
- return request;
+void
+URL::touch()
+{
+ absolute_.clear();
+ authorityHttp_.clear();
+ authorityWithPort_.clear();
}
-static HttpRequest *
-urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request)
+SBuf &
+URL::authority(bool requirePort) const
{
- debugs(50, 5, "urnParse: " << urn);
- if (request) {
- request->initHTTP(method, AnyP::PROTO_URN, urn + 4);
- safe_free(request->canonical);
- return request;
+ if (authorityHttp_.isEmpty()) {
+
+ // both formats contain Host/IP
+ authorityWithPort_.append(host());
+ authorityHttp_ = authorityWithPort_;
+
+ // authorityForm_ only has :port if it is non-default
+ authorityWithPort_.appendf(":%u",port());
+ if (port() != getScheme().defaultPort())
+ authorityHttp_ = authorityWithPort_;
}
- return new HttpRequest(method, AnyP::PROTO_URN, urn + 4);
+ return requirePort ? authorityWithPort_ : authorityHttp_;
}
-const char *
-urlCanonical(HttpRequest * request)
+SBuf &
+URL::absolute() const
{
- LOCAL_ARRAY(char, portbuf, 32);
- LOCAL_ARRAY(char, urlbuf, MAX_URL);
-
- if (request->canonical)
- return request->canonical;
-
- if (request->protocol == AnyP::PROTO_URN) {
- snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
- SQUIDSTRINGPRINT(request->urlpath));
- } else if (request->method.id() == Http::METHOD_CONNECT) {
- snprintf(urlbuf, MAX_URL, "%s:%d", request->GetHost(), request->port);
- } else {
- portbuf[0] = '\0';
-
- if (request->port != urlDefaultPort(request->protocol))
- snprintf(portbuf, 32, ":%d", request->port);
-
- const AnyP::UriScheme sch = request->protocol; // temporary, until bug 1961 URL handling is fixed.
- snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s" SQUIDSTRINGPH,
- sch.c_str(),
- request->login,
- *request->login ? "@" : null_string,
- request->GetHost(),
- portbuf,
- SQUIDSTRINGPRINT(request->urlpath));
+ if (absolute_.isEmpty()) {
+ // TODO: most URL will be much shorter, avoid allocating this much
+ absolute_.reserveCapacity(MAX_URL);
+
+ absolute_.append(getScheme().image());
+ absolute_.append(":",1);
+ if (getScheme() != AnyP::PROTO_URN) {
+ absolute_.append("//", 2);
+ const bool omitUserInfo = getScheme() == AnyP::PROTO_HTTP ||
+ getScheme() != AnyP::PROTO_HTTPS ||
+ userInfo().isEmpty();
+ if (!omitUserInfo) {
+ absolute_.append(userInfo());
+ absolute_.append("@", 1);
+ }
+ absolute_.append(authority());
+ }
+ absolute_.append(path());
}
- return (request->canonical = xstrdup(urlbuf));
+ return absolute_;
}
-/** \todo AYJ: Performance: This is an *almost* duplicate of urlCanonical. But elides the query-string.
+/** \todo AYJ: Performance: This is an *almost* duplicate of HttpRequest::effectiveRequestUri(). But elides the query-string.
* After copying it on in the first place! Would be less code to merge the two with a flag parameter.
* and never copy the query-string part in the first place
*/
urlCanonicalClean(const HttpRequest * request)
{
LOCAL_ARRAY(char, buf, MAX_URL);
- LOCAL_ARRAY(char, portbuf, 32);
- LOCAL_ARRAY(char, loginbuf, MAX_LOGIN_SZ + 1);
- char *t;
-
- if (request->protocol == AnyP::PROTO_URN) {
- snprintf(buf, MAX_URL, "urn:" SQUIDSTRINGPH,
- SQUIDSTRINGPRINT(request->urlpath));
- } else if (request->method.id() == Http::METHOD_CONNECT) {
- snprintf(buf, MAX_URL, "%s:%d", request->GetHost(), request->port);
- } else {
- portbuf[0] = '\0';
-
- if (request->port != urlDefaultPort(request->protocol))
- snprintf(portbuf, 32, ":%d", request->port);
-
- loginbuf[0] = '\0';
-
- if ((int) strlen(request->login) > 0) {
- strcpy(loginbuf, request->login);
- if ((t = strchr(loginbuf, ':')))
- *t = '\0';
+ snprintf(buf, sizeof(buf), SQUIDSBUFPH, SQUIDSBUFPRINT(request->effectiveRequestUri()));
+ buf[sizeof(buf)-1] = '\0';
- strcat(loginbuf, "@");
+ // URN, CONNECT method, and non-stripped URIs can go straight out
+ if (Config.onoff.strip_query_terms && !(request->method == Http::METHOD_CONNECT || request->url.getScheme() == AnyP::PROTO_URN)) {
+ // strip anything AFTER a question-mark
+ // leaving the '?' in place
+ if (auto t = strchr(buf, '?')) {
+ *(++t) = '\0';
}
-
- const AnyP::UriScheme sch = request->protocol; // temporary, until bug 1961 URL handling is fixed.
- snprintf(buf, MAX_URL, "%s://%s%s%s" SQUIDSTRINGPH,
- sch.c_str(),
- loginbuf,
- request->GetHost(),
- portbuf,
- SQUIDSTRINGPRINT(request->urlpath));
- /*
- * strip arguments AFTER a question-mark
- */
-
- if (Config.onoff.strip_query_terms)
- if ((t = strchr(buf, '?')))
- *(++t) = '\0';
}
if (stringHasCntl(buf))
LOCAL_ARRAY(char, buf, MAX_URL);
// method CONNECT and port HTTPS
- if (request->method == Http::METHOD_CONNECT && request->port == 443) {
- snprintf(buf, MAX_URL, "https://%s/*", request->GetHost());
+ if (request->method == Http::METHOD_CONNECT && request->url.port() == 443) {
+ snprintf(buf, MAX_URL, "https://%s/*", request->url.host());
return buf;
}
char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
- if (req->protocol == AnyP::PROTO_URN) {
- snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
- SQUIDSTRINGPRINT(req->urlpath));
+ if (req->url.getScheme() == AnyP::PROTO_URN) {
+ // XXX: this is what the original code did, but it seems to break the
+ // intended behaviour of this function. It returns the stored URN path,
+ // not converting the given one into a URN...
+ snprintf(urlbuf, MAX_URL, SQUIDSBUFPH, SQUIDSBUFPRINT(req->url.absolute()));
return (urlbuf);
}
- size_t urllen;
-
- const AnyP::UriScheme sch = req->protocol; // temporary, until bug 1961 URL handling is fixed.
- if (req->port != urlDefaultPort(req->protocol)) {
- urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s:%d",
- sch.c_str(),
- req->login,
- *req->login ? "@" : null_string,
- req->GetHost(),
- req->port
- );
- } else {
- urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s",
- sch.c_str(),
- req->login,
- *req->login ? "@" : null_string,
- req->GetHost()
- );
- }
-
+ SBuf authorityForm = req->url.authority(); // host[:port]
+ const SBuf &scheme = req->url.getScheme().image();
+ size_t urllen = snprintf(urlbuf, MAX_URL, SQUIDSBUFPH "://" SQUIDSBUFPH "%s" SQUIDSBUFPH,
+ SQUIDSBUFPRINT(scheme),
+ SQUIDSBUFPRINT(req->url.userInfo()),
+ !req->url.userInfo().isEmpty() ? "@" : "",
+ SQUIDSBUFPRINT(authorityForm));
+
+ // if the first char is '/' assume its a relative path
+ // XXX: this breaks on scheme-relative URLs,
+ // but we should not see those outside ESI, and rarely there.
+ // XXX: also breaks on any URL containing a '/' in the query-string portion
if (relUrl[0] == '/') {
- strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
+ xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
} else {
- const char *path = req->urlpath.termedBuf();
- const char *last_slash = strrchr(path, '/');
+ SBuf path = req->url.path();
+ SBuf::size_type lastSlashPos = path.rfind('/');
- if (last_slash == NULL) {
+ if (lastSlashPos == SBuf::npos) {
+ // replace the whole path with the given bit(s)
urlbuf[urllen] = '/';
++urllen;
- strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
+ xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
} else {
- ++last_slash;
- size_t pathlen = last_slash - path;
- if (pathlen > MAX_URL - urllen - 1) {
- pathlen = MAX_URL - urllen - 1;
+ // replace only the last (file?) segment with the given bit(s)
+ ++lastSlashPos;
+ if (lastSlashPos > MAX_URL - urllen - 1) {
+ // XXX: crops bits in the middle of the combined URL.
+ lastSlashPos = MAX_URL - urllen - 1;
}
- strncpy(&urlbuf[urllen], path, pathlen);
- urllen += pathlen;
+ SBufToCstring(&urlbuf[urllen], path.substr(0,lastSlashPos));
+ urllen += lastSlashPos;
if (urllen + 1 < MAX_URL) {
- strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
+ xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
}
}
}
return (urlbuf);
}
-/*
- * matchDomainName() compares a hostname with a domainname according
- * to the following rules:
- *
- * HOST DOMAIN MATCH?
- * ------------- ------------- ------
- * foo.com foo.com YES
- * .foo.com foo.com YES
- * x.foo.com foo.com NO
- * foo.com .foo.com YES
- * .foo.com .foo.com YES
- * x.foo.com .foo.com YES
- *
- * We strip leading dots on hosts (but not domains!) so that
- * ".foo.com" is is always the same as "foo.com".
- *
- * Return values:
- * 0 means the host matches the domain
- * 1 means the host is greater than the domain
- * -1 means the host is less than the domain
- */
-
int
-matchDomainName(const char *h, const char *d)
+matchDomainName(const char *h, const char *d, uint flags)
{
int dl;
int hl;
+ const bool hostIncludesSubdomains = (*h == '.');
while ('.' == *h)
++h;
hl = strlen(h);
+ if (hl == 0)
+ return -1;
+
dl = strlen(d);
/*
* is a leading '.'.
*/
- if ('.' == d[0])
- return 0;
- else
+ if ('.' == d[0]) {
+ if (flags & mdnRejectSubsubDomains) {
+ // Check for sub-sub domain and reject
+ while(--hl >= 0 && h[hl] != '.');
+ if (hl < 0) {
+ // No sub-sub domain found, but reject if there is a
+ // leading dot in given host string (which is removed
+ // before the check is started).
+ return hostIncludesSubdomains ? 1 : 0;
+ } else
+ return 1; // sub-sub domain, reject
+ } else
+ return 0;
+ } else
return 1;
}
}
/*
* We found different characters in the same position (from the end).
*/
+
+ // If the h has a form of "*.foo.com" and d has a form of "x.foo.com"
+ // then the h[hl] points to '*', h[hl+1] to '.' and d[dl] to 'x'
+ // The following checks are safe, the "h[hl + 1]" in the worst case is '\0'.
+ if ((flags & mdnHonorWildcards) && h[hl] == '*' && h[hl + 1] == '.')
+ return 0;
+
/*
* If one of those character is '.' then its special. In order
* for splay tree sorting to work properly, "x-foo.com" must
// we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
// we also support forwarding OPTIONS and TRACE, except for the *-URI ones
if (r->method == Http::METHOD_OPTIONS || r->method == Http::METHOD_TRACE)
- return (r->header.getInt64(HDR_MAX_FORWARDS) == 0 || r->urlpath != "*");
+ return (r->header.getInt64(Http::HdrType::MAX_FORWARDS) == 0 || r->url.path() != URL::Asterisk());
if (r->method == Http::METHOD_PURGE)
return 1;
/* does method match the protocol? */
- switch (r->protocol) {
+ switch (r->url.getScheme()) {
case AnyP::PROTO_URN:
break;
case AnyP::PROTO_HTTPS:
-#if USE_SSL
-
+#if USE_OPENSSL
+ rc = 1;
+#elif USE_GNUTLS
rc = 1;
-
- break;
-
#else
/*
* Squid can't originate an SSL connection, so it should
* CONNECT instead.
*/
rc = 0;
-
#endif
+ break;
default:
break;
return Host;
}
+
+URL::URL(AnyP::UriScheme const &aScheme) :
+ scheme_(aScheme),
+ hostIsNumeric_(false),
+ port_(0)
+{
+ *host_=0;
+}
+