/*
- * $Id: url.cc,v 1.165 2008/02/03 10:00:30 amosjeffries Exp $
+ * $Id$
*
* DEBUG: section 23 URL Parsing
* AUTHOR: Duane Wessels
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
- *
+ *
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
* If the 'request' arg is non-NULL, put parsed values there instead
* of allocating a new HttpRequest.
*
- * This abuses HttpRequest as a way of representing the parsed url
+ * This abuses HttpRequest as a way of representing the parsed url
* and its components.
* method is used to switch parsers and to init the HttpRequest.
* If method is METHOD_CONNECT, then rather than a URL a hostname:port is
if (method == METHOD_CONNECT) {
port = CONNECT_PORT;
- if (sscanf(url, "[%[^:]]:%d", host, &port) < 1)
+ if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
if (sscanf(url, "%[^:]:%d", host, &port) < 1)
return NULL;
*dst = *src;
}
if (i >= l)
- return NULL;
+ return NULL;
*dst = '\0';
/* Then its :// */
/* Then everything until first /; thats host (and port; which we'll look for here later) */
/* bug 1881: If we don't get a "/" then we imply it was there */
- for (dst = host; i < l && *src != '/' && src != '\0'; i++, src++, dst++) {
+ for (dst = host; i < l && *src != '/' && *src != '\0'; i++, src++, dst++) {
*dst = *src;
}
- /*
+ /*
* We can't check for "i >= l" here because we could be at the end of the line
* and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
* been -given- a valid URL and the path is just '/'.
}
/* Is there any host information? (we should eventually parse it above) */
- if(*host == '[') {
+ if (*host == '[') {
/* strip any IPA brackets. valid under IPv6. */
dst = host;
#if USE_IPV6
/* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
- src = host; src++;
+ src = host;
+ src++;
l = strlen(host);
i = 1;
for (; i < l && *src != ']' && *src != '\0'; i++, src++, dst++) {
*(dst++) = '\0';
#else
/* IPv4-pure needs to skip the whole hostname to ']' inclusive for now */
- while(*dst != '\0' && *dst != ']') dst++;
+ while (*dst != '\0' && *dst != ']') dst++;
#endif
/* skip ahead to either start of port, or original EOS */
- while(*dst != '\0' && *dst != ':') dst++;
+ while (*dst != '\0' && *dst != ':') dst++;
t = dst;
} else {
t = strrchr(host, ':');
- if(t != strchr(host,':') ) {
+ if (t != strchr(host,':') ) {
/* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
/* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
/* therefore we MUST accept the case where they are not bracketed at all. */
}
if (t && *t == ':') {
- *t = '\0'; t++;
+ *t = '\0';
+ t++;
port = atoi(t);
}
}
/* reject duplicate or leading dots */
if (strstr(host, "..") || *host == '.') {
- debug(23, 1) ("urlParse: Illegal hostname '%s'\n", host);
+ debugs(23, 1, "urlParse: Illegal hostname '" << host << "'");
return NULL;
}
switch (request->method.id()) {
case METHOD_CONNECT:
- snprintf(buf, MAX_URL, "%s:%d",
+ snprintf(buf, MAX_URL, "%s:%d",
request->GetHost(),
request->port);
break;
return buf;
}
+/*
+ * Test if a URL is relative.
+ *
+ * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
+ * appear before a ':'.
+ */
+bool
+urlIsRelative(const char *url)
+{
+ const char *p;
+
+ if (url == NULL) {
+ return (false);
+ }
+ if (*url == '\0') {
+ return (false);
+ }
+
+ for (p = url; *p != '\0' && *p != ':' && *p != '/'; p++);
+
+ if (*p == ':') {
+ return (false);
+ }
+ return (true);
+}
+
+/*
+ * Convert a relative URL to an absolute URL using the context of a given
+ * request.
+ *
+ * It is assumed that you have already ensured that the URL is relative.
+ *
+ * If NULL is returned it is an indication that the method in use in the
+ * request does not distinguish between relative and absolute and you should
+ * use the url unchanged.
+ *
+ * If non-NULL is returned, it is up to the caller to free the resulting
+ * memory using safe_free().
+ */
+char *
+urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
+{
+
+ if (req->method.id() == METHOD_CONNECT) {
+ return (NULL);
+ }
+
+ char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
+
+ if (req->protocol == PROTO_URN) {
+ snprintf(urlbuf, MAX_URL, "urn:%s", req->urlpath.buf());
+ return (urlbuf);
+ }
+
+ size_t urllen;
+
+ if (req->port != urlDefaultPort(req->protocol)) {
+ urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s:%d",
+ ProtocolStr[req->protocol],
+ req->login,
+ *req->login ? "@" : null_string,
+ req->GetHost(),
+ req->port
+ );
+ } else {
+ urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s",
+ ProtocolStr[req->protocol],
+ req->login,
+ *req->login ? "@" : null_string,
+ req->GetHost()
+ );
+ }
+
+ if (relUrl[0] == '/') {
+ strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
+ } else {
+ const char *path = req->urlpath.buf();
+ const char *last_slash = strrchr(path, '/');
+
+ if (last_slash == NULL) {
+ urlbuf[urllen++] = '/';
+ strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
+ } else {
+ last_slash++;
+ size_t pathlen = last_slash - path;
+ if (pathlen > MAX_URL - urllen - 1) {
+ pathlen = MAX_URL - urllen - 1;
+ }
+ strncpy(&urlbuf[urllen], path, pathlen);
+ urllen += pathlen;
+ if (urllen + 1 < MAX_URL) {
+ strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
+ }
+ }
+ }
+
+ return (urlbuf);
+}
+
/*
* matchDomainName() compares a hostname with a domainname according
* to the following rules:
- *
+ *
* HOST DOMAIN MATCH?
* ------------- ------------- ------
* foo.com foo.com YES
URLHostName::init(char const *aUrl)
{
Host[0] = '\0';
- url = url;
+ url = aUrl;
}
void