Cleanup: zap CVS Id tags

[thirdparty/squid.git] / src / url.cc
diff --git a/src/url.cc b/src/url.cc

index a998a2028ca06cb9b3a26bc1367671074ace7ad8..e78e4baf7fed383318bccaab5ff5baeaac166dfa 100644 (file)
--- a/src/url.cc
+++ b/src/url.cc
@@ -1,445 +1,809 @@
  
  /*
- * $Id: url.cc,v 1.118 1999/08/02 06:18:49 wessels Exp $
+ * $Id$
   *
   * DEBUG: section 23    URL Parsing
   * AUTHOR: Duane Wessels
   *
- * SQUID Internet Object Cache  http://squid.nlanr.net/Squid/
+ * SQUID Web Proxy Cache          http://www.squid-cache.org/
   * ----------------------------------------------------------
   *
- *  Squid is the result of efforts by numerous individuals from the
- *  Internet community.  Development is led by Duane Wessels of the
- *  National Laboratory for Applied Network Research and funded by the
- *  National Science Foundation.  Squid is Copyrighted (C) 1998 by
- *  Duane Wessels and the University of California San Diego.  Please
- *  see the COPYRIGHT file for full details.  Squid incorporates
- *  software developed and/or copyrighted by other sources.  Please see
- *  the CREDITS file for full details.
+ *  Squid is the result of efforts by numerous individuals from
+ *  the Internet community; see the CONTRIBUTORS file for full
+ *  details.   Many organizations have provided support for Squid's
+ *  development; see the SPONSORS file for full details.  Squid is
+ *  Copyrighted (C) 2001 by the Regents of the University of
+ *  California; see the COPYRIGHT file for full details.  Squid
+ *  incorporates software developed and/or copyrighted by other
+ *  sources; see the CREDITS file for full details.
   *
   *  This program is free software; you can redistribute it and/or modify
   *  it under the terms of the GNU General Public License as published by
   *  the Free Software Foundation; either version 2 of the License, or
   *  (at your option) any later version.
- *  
+ *
   *  This program is distributed in the hope that it will be useful,
   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   *  GNU General Public License for more details.
- *  
+ *
   *  You should have received a copy of the GNU General Public License
   *  along with this program; if not, write to the Free Software
   *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
   *
   */
  
-#include "squid.h"
+#include "URL.h"
+#include "HttpRequest.h"
+#include "URLScheme.h"
  
-const char *RequestMethodStr[] =
-{
-    "NONE",
-    "GET",
-    "POST",
-    "PUT",
-    "HEAD",
-    "CONNECT",
-    "TRACE",
-    "PURGE",
-    "OPTIONS",
-#ifndef RFC_2518
-    "PROPFIND",
-    "PROPPATCH",
-    "MKCOL",
-    "COPY",
-    "MOVE",
-    "LOCK",
-    "UNLOCK",
+static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn);
+static const char valid_hostname_chars_u[] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+    "abcdefghijklmnopqrstuvwxyz"
+    "0123456789-._"
+#if USE_IPV6
+    "[:]"
  #endif
-    "ERROR"
-};
-
-const char *ProtocolStr[] =
-{
-    "NONE",
-    "http",
-    "ftp",
-    "gopher",
-    "wais",
-    "cache_object",
-    "icp",
-#if USE_HTCP
-    "htcp",
+    ;
+static const char valid_hostname_chars[] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+    "abcdefghijklmnopqrstuvwxyz"
+    "0123456789-."
+#if USE_IPV6
+    "[:]"
  #endif
-    "urn",
-    "whois",
-    "internal",
-    "https",
-    "TOTAL"
-};
-
-static request_t *urnParse(method_t method, char *urn);
-static const char *const valid_hostname_chars =
-#if ALLOW_HOSTNAME_UNDERSCORES
-"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-"abcdefghijklmnopqrstuvwxyz"
-"0123456789-._";
-#else
-"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-"abcdefghijklmnopqrstuvwxyz"
-"0123456789-.";
-#endif
-
-/* convert %xx in url string to a character 
- * Allocate a new string and return a pointer to converted string */
-
-char *
-url_convert_hex(char *org_url, int allocate)
-{
-    static char code[] = "00";
-    char *url = NULL;
-    char *s = NULL;
-    char *t = NULL;
-    url = allocate ? (char *) xstrdup(org_url) : org_url;
-    if ((int) strlen(url) < 3 || !strchr(url, '%'))
-       return url;
-    for (s = t = url; *(s + 2); s++) {
-       if (*s == '%') {
-           code[0] = *(++s);
-           code[1] = *(++s);
-           *t++ = (char) strtol(code, NULL, 16);
-       } else {
-           *t++ = *s;
-       }
-    }
-    do {
-       *t++ = *s;
-    } while (*s++);
-    return url;
-}
+    ;
  
  void
  urlInitialize(void)
  {
-    debug(23, 5) ("urlInitialize: Initializing...\n");
-    assert(sizeof(ProtocolStr) == (PROTO_MAX + 1) * sizeof(char *));
-    memset(&null_request_flags, '\0', sizeof(null_request_flags));
+    debugs(23, 5, "urlInitialize: Initializing...");
+    /* this ensures that the number of protocol strings is the same as
+     * the enum slots allocated because the last enum is always 'TOTAL'.
+     */
+    assert(strcmp(ProtocolStr[PROTO_MAX], "TOTAL") == 0);
+    /*
+     * These test that our matchDomainName() function works the
+     * way we expect it to.
+     */
+    assert(0 == matchDomainName("foo.com", "foo.com"));
+    assert(0 == matchDomainName(".foo.com", "foo.com"));
+    assert(0 == matchDomainName("foo.com", ".foo.com"));
+    assert(0 == matchDomainName(".foo.com", ".foo.com"));
+    assert(0 == matchDomainName("x.foo.com", ".foo.com"));
+    assert(0 != matchDomainName("x.foo.com", "foo.com"));
+    assert(0 != matchDomainName("foo.com", "x.foo.com"));
+    assert(0 != matchDomainName("bar.com", "foo.com"));
+    assert(0 != matchDomainName(".bar.com", "foo.com"));
+    assert(0 != matchDomainName(".bar.com", ".foo.com"));
+    assert(0 != matchDomainName("bar.com", ".foo.com"));
+    assert(0 < matchDomainName("zzz.com", "foo.com"));
+    assert(0 > matchDomainName("aaa.com", "foo.com"));
+    assert(0 == matchDomainName("FOO.com", "foo.COM"));
+    assert(0 < matchDomainName("bfoo.com", "afoo.com"));
+    assert(0 > matchDomainName("afoo.com", "bfoo.com"));
+    assert(0 < matchDomainName("x-foo.com", ".foo.com"));
+    /* more cases? */
  }
  
-method_t
-urlParseMethod(const char *s)
+/**
+ * urlParseProtocol() takes begin (b) and end (e) pointers, but for
+ * backwards compatibility, e defaults to NULL, in which case we
+ * assume b is NULL-terminated.
+ */
+protocol_t
+urlParseProtocol(const char *b, const char *e)
  {
-    if (strcasecmp(s, "GET") == 0) {
-       return METHOD_GET;
-    } else if (strcasecmp(s, "POST") == 0) {
-       return METHOD_POST;
-    } else if (strcasecmp(s, "PUT") == 0) {
-       return METHOD_PUT;
-    } else if (strcasecmp(s, "HEAD") == 0) {
-       return METHOD_HEAD;
-    } else if (strcasecmp(s, "CONNECT") == 0) {
-       return METHOD_CONNECT;
-    } else if (strcasecmp(s, "TRACE") == 0) {
-       return METHOD_TRACE;
-    } else if (strcasecmp(s, "PURGE") == 0) {
-       return METHOD_PURGE;
-    } else if (strcasecmp(s, "OPTIONS") == 0) {
-       return METHOD_OPTIONS;
-#ifndef RFC_2518
-    } else if (strcasecmp(s, "PROPFIND") == 0) {
-       return METHOD_PROPFIND;
-    } else if (strcasecmp(s, "PROPPATCH") == 0) {
-       return METHOD_PROPPATCH;
-    } else if (strcasecmp(s, "MKCOL") == 0) {
-       return METHOD_MKCOL;
-    } else if (strcasecmp(s, "COPY") == 0) {
-       return METHOD_COPY;
-    } else if (strcasecmp(s, "MOVE") == 0) {
-       return METHOD_MOVE;
-    } else if (strcasecmp(s, "LOCK") == 0) {
-       return METHOD_LOCK;
-    } else if (strcasecmp(s, "UNLOCK") == 0) {
-       return METHOD_UNLOCK;
-#endif
-    }
-    return METHOD_NONE;
-}
+    /*
+     * if e is NULL, b must be NULL terminated and we
+     * make e point to the first whitespace character
+     * after b.
+     */
  
+    if (NULL == e)
+        e = b + strcspn(b, ":");
+
+    int len = e - b;
  
-protocol_t
-urlParseProtocol(const char *s)
-{
      /* test common stuff first */
-    if (strcasecmp(s, "http") == 0)
-       return PROTO_HTTP;
-    if (strcasecmp(s, "ftp") == 0)
-       return PROTO_FTP;
-    if (strcasecmp(s, "https") == 0)
-       return PROTO_HTTPS;
-    if (strcasecmp(s, "file") == 0)
-       return PROTO_FTP;
-    if (strcasecmp(s, "gopher") == 0)
-       return PROTO_GOPHER;
-    if (strcasecmp(s, "wais") == 0)
-       return PROTO_WAIS;
-    if (strcasecmp(s, "cache_object") == 0)
-       return PROTO_CACHEOBJ;
-    if (strcasecmp(s, "urn") == 0)
-       return PROTO_URN;
-    if (strcasecmp(s, "whois") == 0)
-       return PROTO_WHOIS;
-    if (strcasecmp(s, "internal") == 0)
-       return PROTO_INTERNAL;
+
+    if (strncasecmp(b, "http", len) == 0)
+        return PROTO_HTTP;
+
+    if (strncasecmp(b, "ftp", len) == 0)
+        return PROTO_FTP;
+
+    if (strncasecmp(b, "https", len) == 0)
+        return PROTO_HTTPS;
+
+    if (strncasecmp(b, "file", len) == 0)
+        return PROTO_FTP;
+
+    if (strncasecmp(b, "gopher", len) == 0)
+        return PROTO_GOPHER;
+
+    if (strncasecmp(b, "wais", len) == 0)
+        return PROTO_WAIS;
+
+    if (strncasecmp(b, "cache_object", len) == 0)
+        return PROTO_CACHEOBJ;
+
+    if (strncasecmp(b, "urn", len) == 0)
+        return PROTO_URN;
+
+    if (strncasecmp(b, "whois", len) == 0)
+        return PROTO_WHOIS;
+
+    if (strncasecmp(b, "internal", len) == 0)
+        return PROTO_INTERNAL;
+
      return PROTO_NONE;
  }
  
-
  int
  urlDefaultPort(protocol_t p)
  {
      switch (p) {
+
      case PROTO_HTTP:
-       return 80;
+        return 80;
+
      case PROTO_HTTPS:
-       return 443;
+        return 443;
+
      case PROTO_FTP:
-       return 21;
+        return 21;
+
      case PROTO_GOPHER:
-       return 70;
+        return 70;
+
      case PROTO_WAIS:
-       return 210;
+        return 210;
+
      case PROTO_CACHEOBJ:
+
      case PROTO_INTERNAL:
-       return CACHE_HTTP_PORT;
+        return CACHE_HTTP_PORT;
+
      case PROTO_WHOIS:
-       return 43;
+        return 43;
+
      default:
-       return 0;
+        return 0;
      }
  }
  
-request_t *
-urlParse(method_t method, char *url)
+/*
+ * Parse a URI/URL.
+ *
+ * If the 'request' arg is non-NULL, put parsed values there instead
+ * of allocating a new HttpRequest.
+ *
+ * This abuses HttpRequest as a way of representing the parsed url
+ * and its components.
+ * method is used to switch parsers and to init the HttpRequest.
+ * If method is METHOD_CONNECT, then rather than a URL a hostname:port is
+ * looked for.
+ * The url is non const so that if its too long we can NULL-terminate it in place.
+ */
+
+/*
+ * This routine parses a URL. Its assumed that the URL is complete -
+ * ie, the end of the string is the end of the URL. Don't pass a partial
+ * URL here as this routine doesn't have any way of knowing whether
+ * its partial or not (ie, it handles the case of no trailing slash as
+ * being "end of host with implied path of /".
+ */
+HttpRequest *
+urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
  {
      LOCAL_ARRAY(char, proto, MAX_URL);
      LOCAL_ARRAY(char, login, MAX_URL);
      LOCAL_ARRAY(char, host, MAX_URL);
      LOCAL_ARRAY(char, urlpath, MAX_URL);
-    request_t *request = NULL;
      char *t = NULL;
+    char *q = NULL;
      int port;
      protocol_t protocol = PROTO_NONE;
      int l;
+    int i;
+    const char *src;
+    char *dst;
      proto[0] = host[0] = urlpath[0] = login[0] = '\0';
  
      if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
-       /* terminate so it doesn't overflow other buffers */
-       *(url + (MAX_URL >> 1)) = '\0';
-       debug(23, 1) ("urlParse: URL too large (%d bytes)\n", l);
-       return NULL;
+        /* terminate so it doesn't overflow other buffers */
+        *(url + (MAX_URL >> 1)) = '\0';
+        debugs(23, 1, "urlParse: URL too large (" << l << " bytes)");
+        return NULL;
      }
      if (method == METHOD_CONNECT) {
-       port = CONNECT_PORT;
-       if (sscanf(url, "%[^:]:%d", host, &port) < 1)
-           return NULL;
+        port = CONNECT_PORT;
+
+        if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
+            if (sscanf(url, "%[^:]:%d", host, &port) < 1)
+                return NULL;
+
      } else if (!strncmp(url, "urn:", 4)) {
-       return urnParse(method, url);
+        return urnParse(method, url);
      } else {
-       if (sscanf(url, "%[^:]://%[^/]%[^\r\n]", proto, host, urlpath) < 2)
-           return NULL;
-       protocol = urlParseProtocol(proto);
-       port = urlDefaultPort(protocol);
-       /* Is there any login informaiton? */
-       if ((t = strrchr(host, '@'))) {
-           strcpy(login, host);
-           t = strrchr(login, '@');
-           *t = 0;
-           strcpy(host, t + 1);
-       }
-       if ((t = strrchr(host, ':'))) {
-           *t++ = '\0';
-           if (*t != '\0')
-               port = atoi(t);
-       }
+        /* Parse the URL: */
+        src = url;
+        i = 0;
+        /* Find first : - everything before is protocol */
+        for (i = 0, dst = proto; i < l && *src != ':'; i++, src++, dst++) {
+            *dst = *src;
+        }
+        if (i >= l)
+            return NULL;
+        *dst = '\0';
+
+        /* Then its :// */
+        /* (XXX yah, I'm not checking we've got enough data left before checking the array..) */
+        if (*src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
+            return NULL;
+        i += 3;
+        src += 3;
+
+        /* Then everything until first /; thats host (and port; which we'll look for here later) */
+        /* bug 1881: If we don't get a "/" then we imply it was there */
+        for (dst = host; i < l && *src != '/' && *src != '\0'; i++, src++, dst++) {
+            *dst = *src;
+        }
+
+        /*
+         * We can't check for "i >= l" here because we could be at the end of the line
+         * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
+         * been -given- a valid URL and the path is just '/'.
+         */
+        if (i > l)
+            return NULL;
+        *dst = '\0';
+
+        /* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
+        for (dst = urlpath; i < l && *src != '\r' && *src != '\n' && *src != '\0'; i++, src++, dst++) {
+            *dst = *src;
+        }
+
+        /* We -could- be at the end of the buffer here */
+        if (i > l)
+            return NULL;
+        /* If the URL path is empty we set it to be "/" */
+        if (dst == urlpath) {
+            *(dst++) = '/';
+        }
+        *dst = '\0';
+
+        protocol = urlParseProtocol(proto);
+        port = urlDefaultPort(protocol);
+
+        /* Is there any login information? (we should eventually parse it above) */
+        if ((t = strrchr(host, '@'))) {
+            strcpy((char *) login, (char *) host);
+            t = strrchr(login, '@');
+            *t = 0;
+            strcpy((char *) host, t + 1);
+        }
+
+        /* Is there any host information? (we should eventually parse it above) */
+        if (*host == '[') {
+            /* strip any IPA brackets. valid under IPv6. */
+            dst = host;
+#if USE_IPV6
+            /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
+            src = host;
+            src++;
+            l = strlen(host);
+            i = 1;
+            for (; i < l && *src != ']' && *src != '\0'; i++, src++, dst++) {
+                *dst = *src;
+            }
+
+            /* we moved in-place, so truncate the actual hostname found */
+            *(dst++) = '\0';
+#else
+            /* IPv4-pure needs to skip the whole hostname to ']' inclusive for now */
+            while (*dst != '\0' && *dst != ']') dst++;
+#endif
+
+            /* skip ahead to either start of port, or original EOS */
+            while (*dst != '\0' && *dst != ':') dst++;
+            t = dst;
+        } else {
+            t = strrchr(host, ':');
+
+            if (t != strchr(host,':') ) {
+                /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
+                /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
+                /* therefore we MUST accept the case where they are not bracketed at all. */
+                t = NULL;
+            }
+        }
+
+        if (t && *t == ':') {
+            *t = '\0';
+            t++;
+            port = atoi(t);
+        }
      }
+
      for (t = host; *t; t++)
-       *t = xtolower(*t);
-    if (strspn(host, valid_hostname_chars) != strlen(host)) {
-       debug(23, 1) ("urlParse: Illegal character in hostname '%s'\n", host);
-       return NULL;
+        *t = xtolower(*t);
+
+    if (stringHasWhitespace(host)) {
+        if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
+            t = q = host;
+            while (*t) {
+                if (!xisspace(*t))
+                    *q++ = *t;
+                t++;
+            }
+            *q = '\0';
+        }
      }
+
+    debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
+
+    if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
+        debugs(23, 1, "urlParse: Illegal character in hostname '" << host << "'");
+        return NULL;
+    }
+
+    if (Config.appendDomain && !strchr(host, '.'))
+        strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
+
      /* remove trailing dots from hostnames */
      while ((l = strlen(host)) > 0 && host[--l] == '.')
-       host[l] = '\0';
-    if (Config.appendDomain && !strchr(host, '.'))
-       strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN);
-    if (port == 0) {
-       debug(23, 3) ("urlParse: Invalid port == 0\n");
-       return NULL;
+        host[l] = '\0';
+
+    /* reject duplicate or leading dots */
+    if (strstr(host, "..") || *host == '.') {
+        debugs(23, 1, "urlParse: Illegal hostname '" << host << "'");
+        return NULL;
      }
+
+    if (port < 1 || port > 65535) {
+        debugs(23, 3, "urlParse: Invalid port '" << port << "'");
+        return NULL;
+    }
+
  #ifdef HARDCODE_DENY_PORTS
      /* These ports are filtered in the default squid.conf, but
       * maybe someone wants them hardcoded... */
-    if (port == 7 || port == 9 || port = 19) {
-       debug(23, 0) ("urlParse: Deny access to port %d\n", port);
-       return NULL;
+    if (port == 7 || port == 9 || port == 19) {
+        debugs(23, 0, "urlParse: Deny access to port " << port);
+        return NULL;
      }
  #endif
+
      if (stringHasWhitespace(urlpath)) {
-       debug(23, 2) ("urlParse: URI has whitespace: {%s}\n", url);
-       switch (Config.uri_whitespace) {
-       case URI_WHITESPACE_DENY:
-           return NULL;
-       case URI_WHITESPACE_ALLOW:
-           break;
-       case URI_WHITESPACE_ENCODE:
-           t = rfc1738_escape(urlpath);
-           xstrncpy(urlpath, t, MAX_URL);
-           break;
-       case URI_WHITESPACE_CHOP:
-           *(urlpath + strcspn(urlpath, w_space)) = '\0';
-           break;
-       }
+        debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
+
+        switch (Config.uri_whitespace) {
+
+        case URI_WHITESPACE_DENY:
+            return NULL;
+
+        case URI_WHITESPACE_ALLOW:
+            break;
+
+        case URI_WHITESPACE_ENCODE:
+            t = rfc1738_escape_unescaped(urlpath);
+            xstrncpy(urlpath, t, MAX_URL);
+            break;
+
+        case URI_WHITESPACE_CHOP:
+            *(urlpath + strcspn(urlpath, w_space)) = '\0';
+            break;
+
+        case URI_WHITESPACE_STRIP:
+        default:
+            t = q = urlpath;
+            while (*t) {
+                if (!xisspace(*t))
+                    *q++ = *t;
+                t++;
+            }
+            *q = '\0';
+        }
+    }
+
+    if (NULL == request)
+        request = new HttpRequest(method, protocol, urlpath);
+    else {
+        request->initHTTP(method, protocol, urlpath);
      }
-    request = requestCreate(method, protocol, urlpath);
-    xstrncpy(request->host, host, SQUIDHOSTNAMELEN);
+
+    request->SetHost(host);
      xstrncpy(request->login, login, MAX_LOGIN_SZ);
      request->port = (u_short) port;
      return request;
  }
  
-static request_t *
-urnParse(method_t method, char *urn)
+static HttpRequest *
+urnParse(const HttpRequestMethod& method, char *urn)
  {
-    debug(50, 5) ("urnParse: %s\n", urn);
-    return requestCreate(method, PROTO_URN, urn + 4);
+    debugs(50, 5, "urnParse: " << urn);
+    return new HttpRequest(method, PROTO_URN, urn + 4);
  }
  
  const char *
-urlCanonical(request_t * request)
+urlCanonical(HttpRequest * request)
  {
      LOCAL_ARRAY(char, portbuf, 32);
+/// \todo AYJ: Performance: making this a ptr and allocating when needed will be better than a write and future xstrdup().
      LOCAL_ARRAY(char, urlbuf, MAX_URL);
+
      if (request->canonical)
-       return request->canonical;
+        return request->canonical;
+
      if (request->protocol == PROTO_URN) {
-       snprintf(urlbuf, MAX_URL, "urn:%s", strBuf(request->urlpath));
+        snprintf(urlbuf, MAX_URL, "urn:%s", request->urlpath.buf());
      } else {
-       switch (request->method) {
-       case METHOD_CONNECT:
-           snprintf(urlbuf, MAX_URL, "%s:%d", request->host, request->port);
-           break;
-       default:
-           portbuf[0] = '\0';
-           if (request->port != urlDefaultPort(request->protocol))
-               snprintf(portbuf, 32, ":%d", request->port);
-           snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s%s",
-               ProtocolStr[request->protocol],
-               request->login,
-               *request->login ? "@" : null_string,
-               request->host,
-               portbuf,
-               strBuf(request->urlpath));
-           break;
-       }
+/// \todo AYJ: this could use "if..else and method == METHOD_CONNECT" easier.
+        switch (request->method.id()) {
+
+        case METHOD_CONNECT:
+            snprintf(urlbuf, MAX_URL, "%s:%d", request->GetHost(), request->port);
+            break;
+
+        default:
+            portbuf[0] = '\0';
+
+            if (request->port != urlDefaultPort(request->protocol))
+                snprintf(portbuf, 32, ":%d", request->port);
+
+            snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s%s",
+                     ProtocolStr[request->protocol],
+                     request->login,
+                     *request->login ? "@" : null_string,
+                     request->GetHost(),
+                     portbuf,
+                     request->urlpath.buf());
+
+            break;
+        }
      }
+
      return (request->canonical = xstrdup(urlbuf));
  }
  
+/** \todo AYJ: Performance: This is an *almost* duplicate of urlCanoncical. But elides the query-string.
+ *        After copying it on in the first place! Would be less code to merge the two with a flag parameter.
+ *        and never copy the query-string part in the first place
+ */
  char *
-urlCanonicalClean(const request_t * request)
+urlCanonicalClean(const HttpRequest * request)
  {
      LOCAL_ARRAY(char, buf, MAX_URL);
      LOCAL_ARRAY(char, portbuf, 32);
      LOCAL_ARRAY(char, loginbuf, MAX_LOGIN_SZ + 1);
      char *t;
+
      if (request->protocol == PROTO_URN) {
-       snprintf(buf, MAX_URL, "urn:%s", strBuf(request->urlpath));
+        snprintf(buf, MAX_URL, "urn:%s", request->urlpath.buf());
      } else {
-       switch (request->method) {
-       case METHOD_CONNECT:
-           snprintf(buf, MAX_URL, "%s:%d", request->host, request->port);
-           break;
-       default:
-           portbuf[0] = '\0';
-           if (request->port != urlDefaultPort(request->protocol))
-               snprintf(portbuf, 32, ":%d", request->port);
-           loginbuf[0] = '\0';
-           if ((int) strlen(request->login) > 0) {
-               strcpy(loginbuf, request->login);
-               if ((t = strchr(loginbuf, ':')))
-                   *t = '\0';
-               strcat(loginbuf, "@");
-           }
-           snprintf(buf, MAX_URL, "%s://%s%s%s%s",
-               ProtocolStr[request->protocol],
-               loginbuf,
-               request->host,
-               portbuf,
-               strBuf(request->urlpath));
-           /*
-            * strip arguments AFTER a question-mark
-            */
-           if (Config.onoff.strip_query_terms)
-               if ((t = strchr(buf, '?')))
-                   *(++t) = '\0';
-           break;
-       }
+/// \todo AYJ: this could use "if..else and method == METHOD_CONNECT" easier.
+        switch (request->method.id()) {
+
+        case METHOD_CONNECT:
+            snprintf(buf, MAX_URL, "%s:%d",
+                     request->GetHost(),
+                     request->port);
+            break;
+
+        default:
+            portbuf[0] = '\0';
+
+            if (request->port != urlDefaultPort(request->protocol))
+                snprintf(portbuf, 32, ":%d", request->port);
+
+            loginbuf[0] = '\0';
+
+            if ((int) strlen(request->login) > 0) {
+                strcpy(loginbuf, request->login);
+
+                if ((t = strchr(loginbuf, ':')))
+                    *t = '\0';
+
+                strcat(loginbuf, "@");
+            }
+
+            snprintf(buf, MAX_URL, "%s://%s%s%s%s",
+                     ProtocolStr[request->protocol],
+                     loginbuf,
+                     request->GetHost(),
+                     portbuf,
+                     request->urlpath.buf());
+            /*
+             * strip arguments AFTER a question-mark
+             */
+
+            if (Config.onoff.strip_query_terms)
+                if ((t = strchr(buf, '?')))
+                    *(++t) = '\0';
+
+            break;
+        }
      }
-    if (stringHasWhitespace(buf))
-       xstrncpy(buf, rfc1738_escape(buf), MAX_URL);
+
+    if (stringHasCntl(buf))
+        xstrncpy(buf, rfc1738_escape_unescaped(buf), MAX_URL);
+
      return buf;
  }
  
+/*
+ * Test if a URL is relative.
+ *
+ * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
+ * appear before a ':'.
+ */
+bool
+urlIsRelative(const char *url)
+{
+    const char *p;
+
+    if (url == NULL) {
+        return (false);
+    }
+    if (*url == '\0') {
+        return (false);
+    }
+
+    for (p = url; *p != '\0' && *p != ':' && *p != '/'; p++);
+
+    if (*p == ':') {
+        return (false);
+    }
+    return (true);
+}
+
+/*
+ * Convert a relative URL to an absolute URL using the context of a given
+ * request.
+ *
+ * It is assumed that you have already ensured that the URL is relative.
+ *
+ * If NULL is returned it is an indication that the method in use in the
+ * request does not distinguish between relative and absolute and you should
+ * use the url unchanged.
+ *
+ * If non-NULL is returned, it is up to the caller to free the resulting
+ * memory using safe_free().
+ */
+char *
+urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
+{
+
+    if (req->method.id() == METHOD_CONNECT) {
+        return (NULL);
+    }
+
+    char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
+
+    if (req->protocol == PROTO_URN) {
+        snprintf(urlbuf, MAX_URL, "urn:%s", req->urlpath.buf());
+        return (urlbuf);
+    }
+
+    size_t urllen;
+
+    if (req->port != urlDefaultPort(req->protocol)) {
+        urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s:%d",
+                          ProtocolStr[req->protocol],
+                          req->login,
+                          *req->login ? "@" : null_string,
+                          req->GetHost(),
+                          req->port
+                         );
+    } else {
+        urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s",
+                          ProtocolStr[req->protocol],
+                          req->login,
+                          *req->login ? "@" : null_string,
+                          req->GetHost()
+                         );
+    }
+
+    if (relUrl[0] == '/') {
+        strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
+    } else {
+        const char *path = req->urlpath.buf();
+        const char *last_slash = strrchr(path, '/');
+
+        if (last_slash == NULL) {
+            urlbuf[urllen++] = '/';
+            strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
+        } else {
+            last_slash++;
+            size_t pathlen = last_slash - path;
+            if (pathlen > MAX_URL - urllen - 1) {
+                pathlen = MAX_URL - urllen - 1;
+            }
+            strncpy(&urlbuf[urllen], path, pathlen);
+            urllen += pathlen;
+            if (urllen + 1 < MAX_URL) {
+                strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
+            }
+        }
+    }
+
+    return (urlbuf);
+}
+
+/*
+ * matchDomainName() compares a hostname with a domainname according
+ * to the following rules:
+ *
+ *    HOST          DOMAIN        MATCH?
+ * ------------- -------------    ------
+ *    foo.com       foo.com         YES
+ *   .foo.com       foo.com         YES
+ *  x.foo.com       foo.com          NO
+ *    foo.com      .foo.com         YES
+ *   .foo.com      .foo.com         YES
+ *  x.foo.com      .foo.com         YES
+ *
+ *  We strip leading dots on hosts (but not domains!) so that
+ *  ".foo.com" is is always the same as "foo.com".
+ *
+ *  Return values:
+ *     0 means the host matches the domain
+ *     1 means the host is greater than the domain
+ *    -1 means the host is less than the domain
+ */
+
  int
-matchDomainName(const char *domain, const char *host)
+matchDomainName(const char *h, const char *d)
  {
-    int offset;
-    if ((offset = strlen(host) - strlen(domain)) < 0)
-       return 0;               /* host too short */
-    if (strcasecmp(domain, host + offset) != 0)
-       return 0;               /* no match at all */
-    if (*domain == '.')
-       return 1;
-    if (offset == 0)
-       return 1;
-    if (*(host + offset - 1) == '.')
-       return 1;
-    return 0;
+    int dl;
+    int hl;
+
+    while ('.' == *h)
+        h++;
+
+    hl = strlen(h);
+
+    dl = strlen(d);
+
+    /*
+     * Start at the ends of the two strings and work towards the
+     * beginning.
+     */
+    while (xtolower(h[--hl]) == xtolower(d[--dl])) {
+        if (hl == 0 && dl == 0) {
+            /*
+             * We made it all the way to the beginning of both
+             * strings without finding any difference.
+             */
+            return 0;
+        }
+
+        if (0 == hl) {
+            /*
+             * The host string is shorter than the domain string.
+             * There is only one case when this can be a match.
+             * If the domain is just one character longer, and if
+             * that character is a leading '.' then we call it a
+             * match.
+             */
+
+            if (1 == dl && '.' == d[0])
+                return 0;
+            else
+                return -1;
+        }
+
+        if (0 == dl) {
+            /*
+             * The domain string is shorter than the host string.
+             * This is a match only if the first domain character
+             * is a leading '.'.
+             */
+
+            if ('.' == d[0])
+                return 0;
+            else
+                return 1;
+        }
+    }
+
+    /*
+     * We found different characters in the same position (from the end).
+     */
+    /*
+     * If one of those character is '.' then its special.  In order
+     * for splay tree sorting to work properly, "x-foo.com" must
+     * be greater than ".foo.com" even though '-' is less than '.'.
+     */
+    if ('.' == d[dl])
+        return 1;
+
+    if ('.' == h[hl])
+        return -1;
+
+    return (xtolower(h[hl]) - xtolower(d[dl]));
  }
  
+
+/*
+ * return true if we can serve requests for this method.
+ */
  int
-urlCheckRequest(const request_t * r)
+urlCheckRequest(const HttpRequest * r)
  {
      int rc = 0;
-    /* protocol "independent" methods */
+    /* protocol "independent" methods
+     *
+     * actually these methods are specific to HTTP:
+     * they are methods we recieve on our HTTP port,
+     * and if we had a FTP listener would not be relevant
+     * there.
+     *
+     * So, we should delegate them to HTTP. The problem is that we
+     * do not have a default protocol from the client side of HTTP.
+     */
+
      if (r->method == METHOD_CONNECT)
-       return 1;
+        return 1;
+
      if (r->method == METHOD_TRACE)
-       return 1;
+        return 1;
+
      if (r->method == METHOD_PURGE)
-       return 1;
+        return 1;
+
      /* does method match the protocol? */
      switch (r->protocol) {
+
      case PROTO_URN:
+
      case PROTO_HTTP:
-    case PROTO_HTTPS:
+
      case PROTO_CACHEOBJ:
-       rc = 1;
-       break;
+        rc = 1;
+        break;
+
      case PROTO_FTP:
-       if (r->method == METHOD_PUT)
-           rc = 1;
+
+        if (r->method == METHOD_PUT)
+            rc = 1;
+
      case PROTO_GOPHER:
+
      case PROTO_WAIS:
+
      case PROTO_WHOIS:
-       if (r->method == METHOD_GET)
-           rc = 1;
-       else if (r->method == METHOD_HEAD)
-           rc = 1;
-       break;
+        if (r->method == METHOD_GET)
+            rc = 1;
+        else if (r->method == METHOD_HEAD)
+            rc = 1;
+
+        break;
+
+    case PROTO_HTTPS:
+#ifdef USE_SSL
+
+        rc = 1;
+
+        break;
+
+#else
+        /*
+        * Squid can't originate an SSL connection, so it should
+        * never receive an "https:" URL.  It should always be
+        * CONNECT instead.
+        */
+        rc = 0;
+
+#endif
+
      default:
-       break;
+        break;
      }
+
      return rc;
  }
  
@@ -451,25 +815,105 @@ urlCheckRequest(const request_t * r)
   *      Look for an ending '/' or ':' and terminate
   *      Look for login info preceeded by '@'
   */
+
+class URLHostName
+{
+
+public:
+    char * extract(char const *url);
+
+private:
+    static char Host [SQUIDHOSTNAMELEN];
+    void init(char const *);
+    void findHostStart();
+    void trimTrailingChars();
+    void trimAuth();
+    char const *hostStart;
+    char const *url;
+};
+
  char *
  urlHostname(const char *url)
  {
-    LOCAL_ARRAY(char, host, SQUIDHOSTNAMELEN);
+    return URLHostName().extract(url);
+}
+
+char URLHostName::Host[SQUIDHOSTNAMELEN];
+
+void
+URLHostName::init(char const *aUrl)
+{
+    Host[0] = '\0';
+    url = aUrl;
+}
+
+void
+URLHostName::findHostStart()
+{
+    if (NULL == (hostStart = strchr(url, ':')))
+        return;
+
+    ++hostStart;
+
+    while (*hostStart != '\0' && *hostStart == '/')
+        ++hostStart;
+
+#if USE_IPV6
+    if (*hostStart == ']')
+        ++hostStart;
+#endif
+
+}
+
+void
+URLHostName::trimTrailingChars()
+{
+    char *t;
+
+    if ((t = strchr(Host, '/')))
+        *t = '\0';
+
+    if ((t = strrchr(Host, ':')))
+        *t = '\0';
+
+#if USE_IPV6
+    if ((t = strchr(Host, ']')))
+        *t = '\0';
+#endif
+
+}
+
+void
+URLHostName::trimAuth()
+{
      char *t;
-    host[0] = '\0';
-    if (NULL == (t = strchr(url, ':')))
-       return NULL;
-    t++;
-    while (*t != '\0' && *t == '/')
-       t++;
-    xstrncpy(host, t, SQUIDHOSTNAMELEN);
-    if ((t = strchr(host, '/')))
-       *t = '\0';
-    if ((t = strchr(host, ':')))
-       *t = '\0';
-    if ((t = strrchr(host, '@'))) {
-       t++;
-       xmemmove(host, t, strlen(t) + 1);
+
+    if ((t = strrchr(Host, '@'))) {
+        t++;
+        xmemmove(Host, t, strlen(t) + 1);
      }
-    return host;
  }
+
+char *
+URLHostName::extract(char const *aUrl)
+{
+    init(aUrl);
+    findHostStart();
+
+    if (hostStart == NULL)
+        return NULL;
+
+    xstrncpy(Host, hostStart, SQUIDHOSTNAMELEN);
+
+    trimTrailingChars();
+
+    trimAuth();
+
+    return Host;
+}
+
+URL::URL() : scheme()
+{}
+
+URL::URL(URLScheme const &aScheme): scheme(aScheme)
+{}