Docs: Copyright updates for 2018 (#114)

[thirdparty/squid.git] / src / url.cc
diff --git a/src/url.cc b/src/url.cc

index 0268467f8e4a009e2799ad8fc33e9819ed689e29..44b61522a2a6abc2769b2079c2abd12f87596424 100644 (file)
--- a/src/url.cc
+++ b/src/url.cc
@@ -1,36 +1,13 @@
-
  /*
- * DEBUG: section 23    URL Parsing
- * AUTHOR: Duane Wessels
- *
- * SQUID Web Proxy Cache          http://www.squid-cache.org/
- * ----------------------------------------------------------
- *
- *  Squid is the result of efforts by numerous individuals from
- *  the Internet community; see the CONTRIBUTORS file for full
- *  details.   Many organizations have provided support for Squid's
- *  development; see the SPONSORS file for full details.  Squid is
- *  Copyrighted (C) 2001 by the Regents of the University of
- *  California; see the COPYRIGHT file for full details.  Squid
- *  incorporates software developed and/or copyrighted by other
- *  sources; see the CREDITS file for full details.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
+ * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
   *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
   */
  
+/* DEBUG: section 23    URL Parsing */
+
  #include "squid.h"
  #include "globals.h"
  #include "HttpRequest.h"
@@ -39,14 +16,6 @@
  #include "SquidString.h"
  #include "URL.h"
  
-static HttpRequest *urlParseFinish(const HttpRequestMethod& method,
-                                   const AnyP::ProtocolType protocol,
-                                   const char *const urlpath,
-                                   const char *const host,
-                                   const char *const login,
-                                   const int port,
-                                   HttpRequest *request);
-static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request);
  static const char valid_hostname_chars_u[] =
      "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
      "abcdefghijklmnopqrstuvwxyz"
@@ -60,6 +29,48 @@ static const char valid_hostname_chars[] =
      "[:]"
      ;
  
+const SBuf &
+URL::Asterisk()
+{
+    static SBuf star("*");
+    return star;
+}
+
+const SBuf &
+URL::SlashPath()
+{
+    static SBuf slash("/");
+    return slash;
+}
+
+void
+URL::host(const char *src)
+{
+    hostAddr_.setEmpty();
+    hostAddr_ = src;
+    if (hostAddr_.isAnyAddr()) {
+        xstrncpy(host_, src, sizeof(host_));
+        hostIsNumeric_ = false;
+    } else {
+        hostAddr_.toHostStr(host_, sizeof(host_));
+        debugs(23, 3, "given IP: " << hostAddr_);
+        hostIsNumeric_ = 1;
+    }
+    touch();
+}
+
+const SBuf &
+URL::path() const
+{
+    // RFC 3986 section 3.3 says path can be empty (path-abempty).
+    // RFC 7230 sections 2.7.3, 5.3.1, 5.7.2 - says path cannot be empty, default to "/"
+    // at least when sending and using. We must still accept path-abempty as input.
+    if (path_.isEmpty() && (scheme_ == AnyP::PROTO_HTTP || scheme_ == AnyP::PROTO_HTTPS))
+        return SlashPath();
+
+    return path_;
+}
+
  void
  urlInitialize(void)
  {
@@ -77,6 +88,7 @@ urlInitialize(void)
      assert(0 == matchDomainName("foo.com", ".foo.com"));
      assert(0 == matchDomainName(".foo.com", ".foo.com"));
      assert(0 == matchDomainName("x.foo.com", ".foo.com"));
+    assert(0 == matchDomainName("y.x.foo.com", ".foo.com"));
      assert(0 != matchDomainName("x.foo.com", "foo.com"));
      assert(0 != matchDomainName("foo.com", "x.foo.com"));
      assert(0 != matchDomainName("bar.com", "foo.com"));
@@ -89,26 +101,29 @@ urlInitialize(void)
      assert(0 < matchDomainName("bfoo.com", "afoo.com"));
      assert(0 > matchDomainName("afoo.com", "bfoo.com"));
      assert(0 < matchDomainName("x-foo.com", ".foo.com"));
+
+    assert(0 == matchDomainName(".foo.com", ".foo.com", mdnRejectSubsubDomains));
+    assert(0 == matchDomainName("x.foo.com", ".foo.com", mdnRejectSubsubDomains));
+    assert(0 != matchDomainName("y.x.foo.com", ".foo.com", mdnRejectSubsubDomains));
+    assert(0 != matchDomainName(".x.foo.com", ".foo.com", mdnRejectSubsubDomains));
+
+    assert(0 == matchDomainName("*.foo.com", "x.foo.com", mdnHonorWildcards));
+    assert(0 == matchDomainName("*.foo.com", ".x.foo.com", mdnHonorWildcards));
+    assert(0 == matchDomainName("*.foo.com", ".foo.com", mdnHonorWildcards));
+    assert(0 != matchDomainName("*.foo.com", "foo.com", mdnHonorWildcards));
+
      /* more cases? */
  }
  
  /**
- * urlParseProtocol() takes begin (b) and end (e) pointers, but for
- * backwards compatibility, e defaults to NULL, in which case we
- * assume b is NULL-terminated.
+ * Parse the scheme name from string b, into protocol type.
+ * The string must be 0-terminated.
   */
  AnyP::ProtocolType
-urlParseProtocol(const char *b, const char *e)
+urlParseProtocol(const char *b)
  {
-    /*
-     * if e is NULL, b must be NULL terminated and we
-     * make e point to the first whitespace character
-     * after b.
-     */
-
-    if (NULL == e)
-        e = b + strcspn(b, ":");
-
+    // make e point to the ':' character
+    const char *e = b + strcspn(b, ":");
      int len = e - b;
  
      /* test common stuff first */
@@ -146,56 +161,16 @@ urlParseProtocol(const char *b, const char *e)
      if (strncasecmp(b, "whois", len) == 0)
          return AnyP::PROTO_WHOIS;
  
-    if (strncasecmp(b, "internal", len) == 0)
-        return AnyP::PROTO_INTERNAL;
+    if (len > 0)
+        return AnyP::PROTO_UNKNOWN;
  
      return AnyP::PROTO_NONE;
  }
  
-int
-urlDefaultPort(AnyP::ProtocolType p)
-{
-    switch (p) {
-
-    case AnyP::PROTO_HTTP:
-        return 80;
-
-    case AnyP::PROTO_HTTPS:
-        return 443;
-
-    case AnyP::PROTO_FTP:
-        return 21;
-
-    case AnyP::PROTO_COAP:
-    case AnyP::PROTO_COAPS:
-        // coaps:// default is TBA as of draft-ietf-core-coap-08.
-        // Assuming IANA policy of allocating same port for base and TLS protocol versions will occur.
-        return 5683;
-
-    case AnyP::PROTO_GOPHER:
-        return 70;
-
-    case AnyP::PROTO_WAIS:
-        return 210;
-
-    case AnyP::PROTO_CACHE_OBJECT:
-
-    case AnyP::PROTO_INTERNAL:
-        return CACHE_HTTP_PORT;
-
-    case AnyP::PROTO_WHOIS:
-        return 43;
-
-    default:
-        return 0;
-    }
-}
-
  /*
   * Parse a URI/URL.
   *
- * If the 'request' arg is non-NULL, put parsed values there instead
- * of allocating a new HttpRequest.
+ * Stores parsed values in the `request` argument.
   *
   * This abuses HttpRequest as a way of representing the parsed url
   * and its components.
@@ -212,43 +187,52 @@ urlDefaultPort(AnyP::ProtocolType p)
   * its partial or not (ie, it handles the case of no trailing slash as
   * being "end of host with implied path of /".
   */
-HttpRequest *
-urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
+bool
+URL::parse(const HttpRequestMethod& method, const char *url)
  {
      LOCAL_ARRAY(char, proto, MAX_URL);
      LOCAL_ARRAY(char, login, MAX_URL);
-    LOCAL_ARRAY(char, host, MAX_URL);
+    LOCAL_ARRAY(char, foundHost, MAX_URL);
      LOCAL_ARRAY(char, urlpath, MAX_URL);
      char *t = NULL;
      char *q = NULL;
-    int port;
+    int foundPort;
      AnyP::ProtocolType protocol = AnyP::PROTO_NONE;
      int l;
      int i;
      const char *src;
      char *dst;
-    proto[0] = host[0] = urlpath[0] = login[0] = '\0';
+    proto[0] = foundHost[0] = urlpath[0] = login[0] = '\0';
  
      if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
-        /* terminate so it doesn't overflow other buffers */
-        *(url + (MAX_URL >> 1)) = '\0';
-        debugs(23, DBG_IMPORTANT, "urlParse: URL too large (" << l << " bytes)");
-        return NULL;
+        debugs(23, DBG_IMPORTANT, MYNAME << "URL too large (" << l << " bytes)");
+        return false;
      }
      if (method == Http::METHOD_CONNECT) {
-        port = CONNECT_PORT;
+        /*
+         * RFC 7230 section 5.3.3:  authority-form = authority
+         *  "excluding any userinfo and its "@" delimiter"
+         *
+         * RFC 3986 section 3.2:    authority = [ userinfo "@" ] host [ ":" port ]
+         *
+         * As an HTTP(S) proxy we assume HTTPS (443) if no port provided.
+         */
+        foundPort = 443;
  
-        if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
-            if (sscanf(url, "%[^:]:%d", host, &port) < 1)
-                return NULL;
+        if (sscanf(url, "[%[^]]]:%d", foundHost, &foundPort) < 1)
+            if (sscanf(url, "%[^:]:%d", foundHost, &foundPort) < 1)
+                return false;
  
      } else if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
-               strcmp(url, "*") == 0) {
-        protocol = AnyP::PROTO_HTTP;
-        port = urlDefaultPort(protocol);
-        return urlParseFinish(method, protocol, url, host, login, port, request);
-    } else if (!strncmp(url, "urn:", 4)) {
-        return urnParse(method, url, request);
+               URL::Asterisk().cmp(url) == 0) {
+        parseFinish(AnyP::PROTO_HTTP, nullptr, url, foundHost, SBuf(), 80 /* HTTP default port */);
+        return true;
+    } else if (strncmp(url, "urn:", 4) == 0) {
+        debugs(23, 3, "Split URI '" << url << "' into proto='urn', path='" << (url+4) << "'");
+        debugs(50, 5, "urn=" << (url+4));
+        setScheme(AnyP::PROTO_URN, nullptr);
+        path(url + 4);
+        return true;
      } else {
          /* Parse the URL: */
          src = url;
@@ -258,12 +242,12 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
              *dst = *src;
          }
          if (i >= l)
-            return NULL;
+            return false;
          *dst = '\0';
  
          /* Then its :// */
          if ((i+3) > l || *src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
-            return NULL;
+            return false;
          i += 3;
          src += 3;
  
@@ -271,7 +255,7 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
          // bug 1881: If we don't get a "/" then we imply it was there
          // bug 3074: We could just be given a "?" or "#". These also imply "/"
          // bug 3233: whitespace is also a hostname delimiter.
-        for (dst = host; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
+        for (dst = foundHost; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
              *dst = *src;
          }
  
@@ -281,7 +265,7 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
           * been -given- a valid URL and the path is just '/'.
           */
          if (i > l)
-            return NULL;
+            return false;
          *dst = '\0';
  
          // bug 3074: received 'path' starting with '?', '#', or '\0' implies '/'
@@ -298,7 +282,7 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
  
          /* We -could- be at the end of the buffer here */
          if (i > l)
-            return NULL;
+            return false;
          /* If the URL path is empty we set it to be "/" */
          if (dst == urlpath) {
              *dst = '/';
@@ -307,27 +291,29 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
          *dst = '\0';
  
          protocol = urlParseProtocol(proto);
-        port = urlDefaultPort(protocol);
+        foundPort = AnyP::UriScheme(protocol).defaultPort();
  
          /* Is there any login information? (we should eventually parse it above) */
-        t = strrchr(host, '@');
+        t = strrchr(foundHost, '@');
          if (t != NULL) {
-            strncpy((char *) login, (char *) host, sizeof(login)-1);
+            strncpy((char *) login, (char *) foundHost, sizeof(login)-1);
              login[sizeof(login)-1] = '\0';
              t = strrchr(login, '@');
              *t = 0;
-            strncpy((char *) host, t + 1, sizeof(host)-1);
-            host[sizeof(host)-1] = '\0';
+            strncpy((char *) foundHost, t + 1, sizeof(foundHost)-1);
+            foundHost[sizeof(foundHost)-1] = '\0';
+            // Bug 4498: URL-unescape the login info after extraction
+            rfc1738_unescape(login);
          }
  
          /* Is there any host information? (we should eventually parse it above) */
-        if (*host == '[') {
+        if (*foundHost == '[') {
              /* strip any IPA brackets. valid under IPv6. */
-            dst = host;
+            dst = foundHost;
              /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
-            src = host;
+            src = foundHost;
              ++src;
-            l = strlen(host);
+            l = strlen(foundHost);
              i = 1;
              for (; i < l && *src != ']' && *src != '\0'; ++i, ++src, ++dst) {
                  *dst = *src;
@@ -342,9 +328,9 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
                  ++dst;
              t = dst;
          } else {
-            t = strrchr(host, ':');
+            t = strrchr(foundHost, ':');
  
-            if (t != strchr(host,':') ) {
+            if (t != strchr(foundHost,':') ) {
                  /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
                  /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
                  /* therefore we MUST accept the case where they are not bracketed at all. */
@@ -353,24 +339,24 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
          }
  
          // Bug 3183 sanity check: If scheme is present, host must be too.
-        if (protocol != AnyP::PROTO_NONE && host[0] == '\0') {
+        if (protocol != AnyP::PROTO_NONE && foundHost[0] == '\0') {
              debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details.");
-            return NULL;
+            return false;
          }
  
          if (t && *t == ':') {
              *t = '\0';
              ++t;
-            port = atoi(t);
+            foundPort = atoi(t);
          }
      }
  
-    for (t = host; *t; ++t)
+    for (t = foundHost; *t; ++t)
          *t = xtolower(*t);
  
-    if (stringHasWhitespace(host)) {
+    if (stringHasWhitespace(foundHost)) {
          if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
-            t = q = host;
+            t = q = foundHost;
              while (*t) {
                  if (!xisspace(*t)) {
                      *q = *t;
@@ -382,48 +368,49 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
          }
      }
  
-    debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
+    debugs(23, 3, "Split URL '" << url << "' into proto='" << proto << "', host='" << foundHost << "', port='" << foundPort << "', path='" << urlpath << "'");
  
-    if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
-        debugs(23, DBG_IMPORTANT, "urlParse: Illegal character in hostname '" << host << "'");
-        return NULL;
+    if (Config.onoff.check_hostnames &&
+            strspn(foundHost, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(foundHost)) {
+        debugs(23, DBG_IMPORTANT, MYNAME << "Illegal character in hostname '" << foundHost << "'");
+        return false;
      }
  
      /* For IPV6 addresses also check for a colon */
-    if (Config.appendDomain && !strchr(host, '.') && !strchr(host, ':'))
-        strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
+    if (Config.appendDomain && !strchr(foundHost, '.') && !strchr(foundHost, ':'))
+        strncat(foundHost, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(foundHost) - 1);
  
      /* remove trailing dots from hostnames */
-    while ((l = strlen(host)) > 0 && host[--l] == '.')
-        host[l] = '\0';
+    while ((l = strlen(foundHost)) > 0 && foundHost[--l] == '.')
+        foundHost[l] = '\0';
  
      /* reject duplicate or leading dots */
-    if (strstr(host, "..") || *host == '.') {
-        debugs(23, DBG_IMPORTANT, "urlParse: Illegal hostname '" << host << "'");
-        return NULL;
+    if (strstr(foundHost, "..") || *foundHost == '.') {
+        debugs(23, DBG_IMPORTANT, MYNAME << "Illegal hostname '" << foundHost << "'");
+        return false;
      }
  
-    if (port < 1 || port > 65535) {
-        debugs(23, 3, "urlParse: Invalid port '" << port << "'");
-        return NULL;
+    if (foundPort < 1 || foundPort > 65535) {
+        debugs(23, 3, "Invalid port '" << foundPort << "'");
+        return false;
      }
  
  #if HARDCODE_DENY_PORTS
      /* These ports are filtered in the default squid.conf, but
       * maybe someone wants them hardcoded... */
-    if (port == 7 || port == 9 || port == 19) {
-        debugs(23, DBG_CRITICAL, "urlParse: Deny access to port " << port);
-        return NULL;
+    if (foundPort == 7 || foundPort == 9 || foundPort == 19) {
+        debugs(23, DBG_CRITICAL, MYNAME << "Deny access to port " << foundPort);
+        return false;
      }
  #endif
  
      if (stringHasWhitespace(urlpath)) {
-        debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
+        debugs(23, 2, "URI has whitespace: {" << url << "}");
  
          switch (Config.uri_whitespace) {
  
          case URI_WHITESPACE_DENY:
-            return NULL;
+            return false;
  
          case URI_WHITESPACE_ALLOW:
              break;
@@ -451,83 +438,79 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
          }
      }
  
-    return urlParseFinish(method, protocol, urlpath, host, login, port, request);
+    parseFinish(protocol, proto, urlpath, foundHost, SBuf(login), foundPort);
+    return true;
  }
  
-/**
- * Update request with parsed URI data.  If the request arg is
- * non-NULL, put parsed values there instead of allocating a new
- * HttpRequest.
- */
-static HttpRequest *
-urlParseFinish(const HttpRequestMethod& method,
-               const AnyP::ProtocolType protocol,
-               const char *const urlpath,
-               const char *const host,
-               const char *const login,
-               const int port,
-               HttpRequest *request)
+/// Update the URL object with parsed URI data.
+void
+URL::parseFinish(const AnyP::ProtocolType protocol,
+                 const char *const protoStr, // for unknown protocols
+                 const char *const aUrlPath,
+                 const char *const aHost,
+                 const SBuf &aLogin,
+                 const int aPort)
  {
-    if (NULL == request)
-        request = new HttpRequest(method, protocol, urlpath);
-    else {
-        request->initHTTP(method, protocol, urlpath);
-        safe_free(request->canonical);
-    }
+    setScheme(protocol, protoStr);
+    path(aUrlPath);
+    host(aHost);
+    userInfo(aLogin);
+    port(aPort);
+}
  
-    request->SetHost(host);
-    xstrncpy(request->login, login, MAX_LOGIN_SZ);
-    request->port = (unsigned short) port;
-    return request;
+void
+URL::touch()
+{
+    absolute_.clear();
+    authorityHttp_.clear();
+    authorityWithPort_.clear();
  }
  
-static HttpRequest *
-urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request)
+SBuf &
+URL::authority(bool requirePort) const
  {
-    debugs(50, 5, "urnParse: " << urn);
-    if (request) {
-        request->initHTTP(method, AnyP::PROTO_URN, urn + 4);
-        safe_free(request->canonical);
-        return request;
+    if (authorityHttp_.isEmpty()) {
+
+        // both formats contain Host/IP
+        authorityWithPort_.append(host());
+        authorityHttp_ = authorityWithPort_;
+
+        // authorityForm_ only has :port if it is non-default
+        authorityWithPort_.appendf(":%u",port());
+        if (port() != getScheme().defaultPort())
+            authorityHttp_ = authorityWithPort_;
      }
  
-    return new HttpRequest(method, AnyP::PROTO_URN, urn + 4);
+    return requirePort ? authorityWithPort_ : authorityHttp_;
  }
  
-const char *
-urlCanonical(HttpRequest * request)
+SBuf &
+URL::absolute() const
  {
-    LOCAL_ARRAY(char, portbuf, 32);
-    LOCAL_ARRAY(char, urlbuf, MAX_URL);
-
-    if (request->canonical)
-        return request->canonical;
-
-    if (request->protocol == AnyP::PROTO_URN) {
-        snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
-                 SQUIDSTRINGPRINT(request->urlpath));
-    } else if (request->method.id() == Http::METHOD_CONNECT) {
-        snprintf(urlbuf, MAX_URL, "%s:%d", request->GetHost(), request->port);
-    } else {
-        portbuf[0] = '\0';
-
-        if (request->port != urlDefaultPort(request->protocol))
-            snprintf(portbuf, 32, ":%d", request->port);
-
-        const AnyP::UriScheme sch = request->protocol; // temporary, until bug 1961 URL handling is fixed.
-        snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s" SQUIDSTRINGPH,
-                 sch.c_str(),
-                 request->login,
-                 *request->login ? "@" : null_string,
-                 request->GetHost(),
-                 portbuf,
-                 SQUIDSTRINGPRINT(request->urlpath));
+    if (absolute_.isEmpty()) {
+        // TODO: most URL will be much shorter, avoid allocating this much
+        absolute_.reserveCapacity(MAX_URL);
+
+        absolute_.append(getScheme().image());
+        absolute_.append(":",1);
+        if (getScheme() != AnyP::PROTO_URN) {
+            absolute_.append("//", 2);
+            const bool omitUserInfo = getScheme() == AnyP::PROTO_HTTP ||
+                                      getScheme() != AnyP::PROTO_HTTPS ||
+                                      userInfo().isEmpty();
+            if (!omitUserInfo) {
+                absolute_.append(userInfo());
+                absolute_.append("@", 1);
+            }
+            absolute_.append(authority());
+        }
+        absolute_.append(path());
      }
  
-    return (request->canonical = xstrdup(urlbuf));
+    return absolute_;
  }
  
-/** \todo AYJ: Performance: This is an *almost* duplicate of urlCanonical. But elides the query-string.
+/** \todo AYJ: Performance: This is an *almost* duplicate of HttpRequest::effectiveRequestUri(). But elides the query-string.
   *        After copying it on in the first place! Would be less code to merge the two with a flag parameter.
   *        and never copy the query-string part in the first place
   */
@@ -535,46 +518,17 @@ char *
  urlCanonicalClean(const HttpRequest * request)
  {
      LOCAL_ARRAY(char, buf, MAX_URL);
-    LOCAL_ARRAY(char, portbuf, 32);
-    LOCAL_ARRAY(char, loginbuf, MAX_LOGIN_SZ + 1);
-    char *t;
-
-    if (request->protocol == AnyP::PROTO_URN) {
-        snprintf(buf, MAX_URL, "urn:" SQUIDSTRINGPH,
-                 SQUIDSTRINGPRINT(request->urlpath));
-    } else if (request->method.id() == Http::METHOD_CONNECT) {
-        snprintf(buf, MAX_URL, "%s:%d", request->GetHost(), request->port);
-    } else {
-        portbuf[0] = '\0';
-
-        if (request->port != urlDefaultPort(request->protocol))
-            snprintf(portbuf, 32, ":%d", request->port);
-
-        loginbuf[0] = '\0';
-
-        if ((int) strlen(request->login) > 0) {
-            strcpy(loginbuf, request->login);
  
-            if ((t = strchr(loginbuf, ':')))
-                *t = '\0';
+    snprintf(buf, sizeof(buf), SQUIDSBUFPH, SQUIDSBUFPRINT(request->effectiveRequestUri()));
+    buf[sizeof(buf)-1] = '\0';
  
-            strcat(loginbuf, "@");
+    // URN, CONNECT method, and non-stripped URIs can go straight out
+    if (Config.onoff.strip_query_terms && !(request->method == Http::METHOD_CONNECT || request->url.getScheme() == AnyP::PROTO_URN)) {
+        // strip anything AFTER a question-mark
+        // leaving the '?' in place
+        if (auto t = strchr(buf, '?')) {
+            *(++t) = '\0';
          }
-
-        const AnyP::UriScheme sch = request->protocol; // temporary, until bug 1961 URL handling is fixed.
-        snprintf(buf, MAX_URL, "%s://%s%s%s" SQUIDSTRINGPH,
-                 sch.c_str(),
-                 loginbuf,
-                 request->GetHost(),
-                 portbuf,
-                 SQUIDSTRINGPRINT(request->urlpath));
-        /*
-         * strip arguments AFTER a question-mark
-         */
-
-        if (Config.onoff.strip_query_terms)
-            if ((t = strchr(buf, '?')))
-                *(++t) = '\0';
      }
  
      if (stringHasCntl(buf))
@@ -595,8 +549,8 @@ urlCanonicalFakeHttps(const HttpRequest * request)
      LOCAL_ARRAY(char, buf, MAX_URL);
  
      // method CONNECT and port HTTPS
-    if (request->method == Http::METHOD_CONNECT && request->port == 443) {
-        snprintf(buf, MAX_URL, "https://%s/*", request->GetHost());
+    if (request->method == Http::METHOD_CONNECT && request->url.port() == 443) {
+        snprintf(buf, MAX_URL, "https://%s/*", request->url.host());
          return buf;
      }
  
@@ -653,52 +607,48 @@ urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
  
      char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
  
-    if (req->protocol == AnyP::PROTO_URN) {
-        snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
-                 SQUIDSTRINGPRINT(req->urlpath));
+    if (req->url.getScheme() == AnyP::PROTO_URN) {
+        // XXX: this is what the original code did, but it seems to break the
+        // intended behaviour of this function. It returns the stored URN path,
+        // not converting the given one into a URN...
+        snprintf(urlbuf, MAX_URL, SQUIDSBUFPH, SQUIDSBUFPRINT(req->url.absolute()));
          return (urlbuf);
      }
  
-    size_t urllen;
-
-    const AnyP::UriScheme sch = req->protocol; // temporary, until bug 1961 URL handling is fixed.
-    if (req->port != urlDefaultPort(req->protocol)) {
-        urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s:%d",
-                          sch.c_str(),
-                          req->login,
-                          *req->login ? "@" : null_string,
-                          req->GetHost(),
-                          req->port
-                         );
-    } else {
-        urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s",
-                          sch.c_str(),
-                          req->login,
-                          *req->login ? "@" : null_string,
-                          req->GetHost()
-                         );
-    }
-
+    SBuf authorityForm = req->url.authority(); // host[:port]
+    const SBuf &scheme = req->url.getScheme().image();
+    size_t urllen = snprintf(urlbuf, MAX_URL, SQUIDSBUFPH "://" SQUIDSBUFPH "%s" SQUIDSBUFPH,
+                             SQUIDSBUFPRINT(scheme),
+                             SQUIDSBUFPRINT(req->url.userInfo()),
+                             !req->url.userInfo().isEmpty() ? "@" : "",
+                             SQUIDSBUFPRINT(authorityForm));
+
+    // if the first char is '/' assume its a relative path
+    // XXX: this breaks on scheme-relative URLs,
+    // but we should not see those outside ESI, and rarely there.
+    // XXX: also breaks on any URL containing a '/' in the query-string portion
      if (relUrl[0] == '/') {
-        strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
+        xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
      } else {
-        const char *path = req->urlpath.termedBuf();
-        const char *last_slash = strrchr(path, '/');
+        SBuf path = req->url.path();
+        SBuf::size_type lastSlashPos = path.rfind('/');
  
-        if (last_slash == NULL) {
+        if (lastSlashPos == SBuf::npos) {
+            // replace the whole path with the given bit(s)
              urlbuf[urllen] = '/';
              ++urllen;
-            strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
+            xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
          } else {
-            ++last_slash;
-            size_t pathlen = last_slash - path;
-            if (pathlen > MAX_URL - urllen - 1) {
-                pathlen = MAX_URL - urllen - 1;
+            // replace only the last (file?) segment with the given bit(s)
+            ++lastSlashPos;
+            if (lastSlashPos > MAX_URL - urllen - 1) {
+                // XXX: crops bits in the middle of the combined URL.
+                lastSlashPos = MAX_URL - urllen - 1;
              }
-            strncpy(&urlbuf[urllen], path, pathlen);
-            urllen += pathlen;
+            SBufToCstring(&urlbuf[urllen], path.substr(0,lastSlashPos));
+            urllen += lastSlashPos;
              if (urllen + 1 < MAX_URL) {
-                strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
+                xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
              }
          }
      }
@@ -706,39 +656,21 @@ urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
      return (urlbuf);
  }
  
-/*
- * matchDomainName() compares a hostname with a domainname according
- * to the following rules:
- *
- *    HOST          DOMAIN        MATCH?
- * ------------- -------------    ------
- *    foo.com       foo.com         YES
- *   .foo.com       foo.com         YES
- *  x.foo.com       foo.com          NO
- *    foo.com      .foo.com         YES
- *   .foo.com      .foo.com         YES
- *  x.foo.com      .foo.com         YES
- *
- *  We strip leading dots on hosts (but not domains!) so that
- *  ".foo.com" is is always the same as "foo.com".
- *
- *  Return values:
- *     0 means the host matches the domain
- *     1 means the host is greater than the domain
- *    -1 means the host is less than the domain
- */
-
  int
-matchDomainName(const char *h, const char *d)
+matchDomainName(const char *h, const char *d, uint flags)
  {
      int dl;
      int hl;
  
+    const bool hostIncludesSubdomains = (*h == '.');
      while ('.' == *h)
          ++h;
  
      hl = strlen(h);
  
+    if (hl == 0)
+        return -1;
+
      dl = strlen(d);
  
      /*
@@ -776,9 +708,20 @@ matchDomainName(const char *h, const char *d)
               * is a leading '.'.
               */
  
-            if ('.' == d[0])
-                return 0;
-            else
+            if ('.' == d[0]) {
+                if (flags & mdnRejectSubsubDomains) {
+                    // Check for sub-sub domain and reject
+                    while(--hl >= 0 && h[hl] != '.');
+                    if (hl < 0) {
+                        // No sub-sub domain found, but reject if there is a
+                        // leading dot in given host string (which is removed
+                        // before the check is started).
+                        return hostIncludesSubdomains ? 1 : 0;
+                    } else
+                        return 1; // sub-sub domain, reject
+                } else
+                    return 0;
+            } else
                  return 1;
          }
      }
@@ -786,6 +729,13 @@ matchDomainName(const char *h, const char *d)
      /*
       * We found different characters in the same position (from the end).
       */
+
+    // If the h has a form of "*.foo.com" and d has a form of "x.foo.com"
+    // then the h[hl] points to '*', h[hl+1] to '.' and d[dl] to 'x'
+    // The following checks are safe, the "h[hl + 1]" in the worst case is '\0'.
+    if ((flags & mdnHonorWildcards) && h[hl] == '*' && h[hl + 1] == '.')
+        return 0;
+
      /*
       * If one of those character is '.' then its special.  In order
       * for splay tree sorting to work properly, "x-foo.com" must
@@ -824,13 +774,13 @@ urlCheckRequest(const HttpRequest * r)
      // we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
      // we also support forwarding OPTIONS and TRACE, except for the *-URI ones
      if (r->method == Http::METHOD_OPTIONS || r->method == Http::METHOD_TRACE)
-        return (r->header.getInt64(HDR_MAX_FORWARDS) == 0 || r->urlpath != "*");
+        return (r->header.getInt64(Http::HdrType::MAX_FORWARDS) == 0 || r->url.path() != URL::Asterisk());
  
      if (r->method == Http::METHOD_PURGE)
          return 1;
  
      /* does method match the protocol? */
-    switch (r->protocol) {
+    switch (r->url.getScheme()) {
  
      case AnyP::PROTO_URN:
  
@@ -858,12 +808,10 @@ urlCheckRequest(const HttpRequest * r)
          break;
  
      case AnyP::PROTO_HTTPS:
-#if USE_SSL
-
+#if USE_OPENSSL
+        rc = 1;
+#elif USE_GNUTLS
          rc = 1;
-
-        break;
-
  #else
          /*
          * Squid can't originate an SSL connection, so it should
@@ -871,8 +819,8 @@ urlCheckRequest(const HttpRequest * r)
          * CONNECT instead.
          */
          rc = 0;
-
  #endif
+        break;
  
      default:
          break;
@@ -979,3 +927,12 @@ URLHostName::extract(char const *aUrl)
  
      return Host;
  }
+
+URL::URL(AnyP::UriScheme const &aScheme) :
+    scheme_(aScheme),
+    hostIsNumeric_(false),
+    port_(0)
+{
+    *host_=0;
+}
+