Cleanup: zap CVS Id tags

[thirdparty/squid.git] / src / url.cc
diff --git a/src/url.cc b/src/url.cc

index 869ec4559b234b515a3e805b26e79c61ba3dea65..e78e4baf7fed383318bccaab5ff5baeaac166dfa 100644 (file)
--- a/src/url.cc
+++ b/src/url.cc
@@ -1,6 +1,6 @@
  
  /*
- * $Id: url.cc,v 1.165 2008/02/03 10:00:30 amosjeffries Exp $
+ * $Id$
   *
   * DEBUG: section 23    URL Parsing
   * AUTHOR: Duane Wessels
@@ -21,12 +21,12 @@
   *  it under the terms of the GNU General Public License as published by
   *  the Free Software Foundation; either version 2 of the License, or
   *  (at your option) any later version.
- *  
+ *
   *  This program is distributed in the hope that it will be useful,
   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   *  GNU General Public License for more details.
- *  
+ *
   *  You should have received a copy of the GNU General Public License
   *  along with this program; if not, write to the Free Software
   *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
@@ -180,7 +180,7 @@ urlDefaultPort(protocol_t p)
   * If the 'request' arg is non-NULL, put parsed values there instead
   * of allocating a new HttpRequest.
   *
- * This abuses HttpRequest as a way of representing the parsed url 
+ * This abuses HttpRequest as a way of representing the parsed url
   * and its components.
   * method is used to switch parsers and to init the HttpRequest.
   * If method is METHOD_CONNECT, then rather than a URL a hostname:port is
@@ -236,7 +236,7 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
              *dst = *src;
          }
          if (i >= l)
-           return NULL;
+            return NULL;
          *dst = '\0';
  
          /* Then its :// */
@@ -248,11 +248,11 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
  
          /* Then everything until first /; thats host (and port; which we'll look for here later) */
          /* bug 1881: If we don't get a "/" then we imply it was there */
-        for (dst = host; i < l && *src != '/' && src != '\0'; i++, src++, dst++) {
+        for (dst = host; i < l && *src != '/' && *src != '\0'; i++, src++, dst++) {
              *dst = *src;
          }
  
-        /* 
+        /*
           * We can't check for "i >= l" here because we could be at the end of the line
           * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
           * been -given- a valid URL and the path is just '/'.
@@ -287,12 +287,13 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
          }
  
          /* Is there any host information? (we should eventually parse it above) */
-        if(*host == '[') {
+        if (*host == '[') {
              /* strip any IPA brackets. valid under IPv6. */
              dst = host;
  #if USE_IPV6
              /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
-            src = host; src++;
+            src = host;
+            src++;
              l = strlen(host);
              i = 1;
              for (; i < l && *src != ']' && *src != '\0'; i++, src++, dst++) {
@@ -303,16 +304,16 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
              *(dst++) = '\0';
  #else
              /* IPv4-pure needs to skip the whole hostname to ']' inclusive for now */
-            while(*dst != '\0' && *dst != ']') dst++;
+            while (*dst != '\0' && *dst != ']') dst++;
  #endif
  
              /* skip ahead to either start of port, or original EOS */
-            while(*dst != '\0' && *dst != ':') dst++;
+            while (*dst != '\0' && *dst != ':') dst++;
              t = dst;
          } else {
              t = strrchr(host, ':');
  
-            if(t != strchr(host,':') ) {
+            if (t != strchr(host,':') ) {
                  /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
                  /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
                  /* therefore we MUST accept the case where they are not bracketed at all. */
@@ -321,7 +322,8 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
          }
  
          if (t && *t == ':') {
-            *t = '\0'; t++;
+            *t = '\0';
+            t++;
              port = atoi(t);
          }
      }
@@ -357,7 +359,7 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
  
      /* reject duplicate or leading dots */
      if (strstr(host, "..") || *host == '.') {
-        debug(23, 1) ("urlParse: Illegal hostname '%s'\n", host);
+        debugs(23, 1, "urlParse: Illegal hostname '" << host << "'");
          return NULL;
      }
  
@@ -486,7 +488,7 @@ urlCanonicalClean(const HttpRequest * request)
          switch (request->method.id()) {
  
          case METHOD_CONNECT:
-            snprintf(buf, MAX_URL, "%s:%d", 
+            snprintf(buf, MAX_URL, "%s:%d",
                       request->GetHost(),
                       request->port);
              break;
@@ -532,10 +534,109 @@ urlCanonicalClean(const HttpRequest * request)
      return buf;
  }
  
+/*
+ * Test if a URL is relative.
+ *
+ * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
+ * appear before a ':'.
+ */
+bool
+urlIsRelative(const char *url)
+{
+    const char *p;
+
+    if (url == NULL) {
+        return (false);
+    }
+    if (*url == '\0') {
+        return (false);
+    }
+
+    for (p = url; *p != '\0' && *p != ':' && *p != '/'; p++);
+
+    if (*p == ':') {
+        return (false);
+    }
+    return (true);
+}
+
+/*
+ * Convert a relative URL to an absolute URL using the context of a given
+ * request.
+ *
+ * It is assumed that you have already ensured that the URL is relative.
+ *
+ * If NULL is returned it is an indication that the method in use in the
+ * request does not distinguish between relative and absolute and you should
+ * use the url unchanged.
+ *
+ * If non-NULL is returned, it is up to the caller to free the resulting
+ * memory using safe_free().
+ */
+char *
+urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
+{
+
+    if (req->method.id() == METHOD_CONNECT) {
+        return (NULL);
+    }
+
+    char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
+
+    if (req->protocol == PROTO_URN) {
+        snprintf(urlbuf, MAX_URL, "urn:%s", req->urlpath.buf());
+        return (urlbuf);
+    }
+
+    size_t urllen;
+
+    if (req->port != urlDefaultPort(req->protocol)) {
+        urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s:%d",
+                          ProtocolStr[req->protocol],
+                          req->login,
+                          *req->login ? "@" : null_string,
+                          req->GetHost(),
+                          req->port
+                         );
+    } else {
+        urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s",
+                          ProtocolStr[req->protocol],
+                          req->login,
+                          *req->login ? "@" : null_string,
+                          req->GetHost()
+                         );
+    }
+
+    if (relUrl[0] == '/') {
+        strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
+    } else {
+        const char *path = req->urlpath.buf();
+        const char *last_slash = strrchr(path, '/');
+
+        if (last_slash == NULL) {
+            urlbuf[urllen++] = '/';
+            strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
+        } else {
+            last_slash++;
+            size_t pathlen = last_slash - path;
+            if (pathlen > MAX_URL - urllen - 1) {
+                pathlen = MAX_URL - urllen - 1;
+            }
+            strncpy(&urlbuf[urllen], path, pathlen);
+            urllen += pathlen;
+            if (urllen + 1 < MAX_URL) {
+                strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
+            }
+        }
+    }
+
+    return (urlbuf);
+}
+
  /*
   * matchDomainName() compares a hostname with a domainname according
   * to the following rules:
- * 
+ *
   *    HOST          DOMAIN        MATCH?
   * ------------- -------------    ------
   *    foo.com       foo.com         YES
@@ -743,7 +844,7 @@ void
  URLHostName::init(char const *aUrl)
  {
      Host[0] = '\0';
-    url = url;
+    url = aUrl;
  }
  
  void