src/url.cc

   1
   2 /*
   3  * $Id: url.cc,v 1.165 2008/02/03 10:00:30 amosjeffries Exp $
   4  *
   5  * DEBUG: section 23    URL Parsing
   6  * AUTHOR: Duane Wessels
   7  *
   8  * SQUID Web Proxy Cache          http://www.squid-cache.org/
   9  * ----------------------------------------------------------
  10  *
  11  *  Squid is the result of efforts by numerous individuals from
  12  *  the Internet community; see the CONTRIBUTORS file for full
  13  *  details.   Many organizations have provided support for Squid's
  14  *  development; see the SPONSORS file for full details.  Squid is
  15  *  Copyrighted (C) 2001 by the Regents of the University of
  16  *  California; see the COPYRIGHT file for full details.  Squid
  17  *  incorporates software developed and/or copyrighted by other
  18  *  sources; see the CREDITS file for full details.
  19  *
  20  *  This program is free software; you can redistribute it and/or modify
  21  *  it under the terms of the GNU General Public License as published by
  22  *  the Free Software Foundation; either version 2 of the License, or
  23  *  (at your option) any later version.
  24  *
  25  *  This program is distributed in the hope that it will be useful,
  26  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  27  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  28  *  GNU General Public License for more details.
  29  *
  30  *  You should have received a copy of the GNU General Public License
  31  *  along with this program; if not, write to the Free Software
  32  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
  33  *
  34  */
  35
  36 #include "URL.h"
  37 #include "HttpRequest.h"
  38 #include "URLScheme.h"
  39
  40 static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn);
  41 static const char valid_hostname_chars_u[] =
  42     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  43     "abcdefghijklmnopqrstuvwxyz"
  44     "0123456789-._"
  45 #if USE_IPV6
  46     "[:]"
  47 #endif
  48     ;
  49 static const char valid_hostname_chars[] =
  50     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  51     "abcdefghijklmnopqrstuvwxyz"
  52     "0123456789-."
  53 #if USE_IPV6
  54     "[:]"
  55 #endif
  56     ;
  57
  58 void
  59 urlInitialize(void)
  60 {
  61     debugs(23, 5, "urlInitialize: Initializing...");
  62     /* this ensures that the number of protocol strings is the same as
  63      * the enum slots allocated because the last enum is always 'TOTAL'.
  64      */
  65     assert(strcmp(ProtocolStr[PROTO_MAX], "TOTAL") == 0);
  66     /*
  67      * These test that our matchDomainName() function works the
  68      * way we expect it to.
  69      */
  70     assert(0 == matchDomainName("foo.com", "foo.com"));
  71     assert(0 == matchDomainName(".foo.com", "foo.com"));
  72     assert(0 == matchDomainName("foo.com", ".foo.com"));
  73     assert(0 == matchDomainName(".foo.com", ".foo.com"));
  74     assert(0 == matchDomainName("x.foo.com", ".foo.com"));
  75     assert(0 != matchDomainName("x.foo.com", "foo.com"));
  76     assert(0 != matchDomainName("foo.com", "x.foo.com"));
  77     assert(0 != matchDomainName("bar.com", "foo.com"));
  78     assert(0 != matchDomainName(".bar.com", "foo.com"));
  79     assert(0 != matchDomainName(".bar.com", ".foo.com"));
  80     assert(0 != matchDomainName("bar.com", ".foo.com"));
  81     assert(0 < matchDomainName("zzz.com", "foo.com"));
  82     assert(0 > matchDomainName("aaa.com", "foo.com"));
  83     assert(0 == matchDomainName("FOO.com", "foo.COM"));
  84     assert(0 < matchDomainName("bfoo.com", "afoo.com"));
  85     assert(0 > matchDomainName("afoo.com", "bfoo.com"));
  86     assert(0 < matchDomainName("x-foo.com", ".foo.com"));
  87     /* more cases? */
  88 }
  89
  90 /**
  91  * urlParseProtocol() takes begin (b) and end (e) pointers, but for
  92  * backwards compatibility, e defaults to NULL, in which case we
  93  * assume b is NULL-terminated.
  94  */
  95 protocol_t
  96 urlParseProtocol(const char *b, const char *e)
  97 {
  98     /*
  99      * if e is NULL, b must be NULL terminated and we
 100      * make e point to the first whitespace character
 101      * after b.
 102      */
 103
 104     if (NULL == e)
 105         e = b + strcspn(b, ":");
 106
 107     int len = e - b;
 108
 109     /* test common stuff first */
 110
 111     if (strncasecmp(b, "http", len) == 0)
 112         return PROTO_HTTP;
 113
 114     if (strncasecmp(b, "ftp", len) == 0)
 115         return PROTO_FTP;
 116
 117     if (strncasecmp(b, "https", len) == 0)
 118         return PROTO_HTTPS;
 119
 120     if (strncasecmp(b, "file", len) == 0)
 121         return PROTO_FTP;
 122
 123     if (strncasecmp(b, "gopher", len) == 0)
 124         return PROTO_GOPHER;
 125
 126     if (strncasecmp(b, "wais", len) == 0)
 127         return PROTO_WAIS;
 128
 129     if (strncasecmp(b, "cache_object", len) == 0)
 130         return PROTO_CACHEOBJ;
 131
 132     if (strncasecmp(b, "urn", len) == 0)
 133         return PROTO_URN;
 134
 135     if (strncasecmp(b, "whois", len) == 0)
 136         return PROTO_WHOIS;
 137
 138     if (strncasecmp(b, "internal", len) == 0)
 139         return PROTO_INTERNAL;
 140
 141     return PROTO_NONE;
 142 }
 143
 144 int
 145 urlDefaultPort(protocol_t p)
 146 {
 147     switch (p) {
 148
 149     case PROTO_HTTP:
 150         return 80;
 151
 152     case PROTO_HTTPS:
 153         return 443;
 154
 155     case PROTO_FTP:
 156         return 21;
 157
 158     case PROTO_GOPHER:
 159         return 70;
 160
 161     case PROTO_WAIS:
 162         return 210;
 163
 164     case PROTO_CACHEOBJ:
 165
 166     case PROTO_INTERNAL:
 167         return CACHE_HTTP_PORT;
 168
 169     case PROTO_WHOIS:
 170         return 43;
 171
 172     default:
 173         return 0;
 174     }
 175 }
 176
 177 /*
 178  * Parse a URI/URL.
 179  *
 180  * If the 'request' arg is non-NULL, put parsed values there instead
 181  * of allocating a new HttpRequest.
 182  *
 183  * This abuses HttpRequest as a way of representing the parsed url
 184  * and its components.
 185  * method is used to switch parsers and to init the HttpRequest.
 186  * If method is METHOD_CONNECT, then rather than a URL a hostname:port is
 187  * looked for.
 188  * The url is non const so that if its too long we can NULL-terminate it in place.
 189  */
 190
 191 /*
 192  * This routine parses a URL. Its assumed that the URL is complete -
 193  * ie, the end of the string is the end of the URL. Don't pass a partial
 194  * URL here as this routine doesn't have any way of knowing whether
 195  * its partial or not (ie, it handles the case of no trailing slash as
 196  * being "end of host with implied path of /".
 197  */
 198 HttpRequest *
 199 urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
 200 {
 201     LOCAL_ARRAY(char, proto, MAX_URL);
 202     LOCAL_ARRAY(char, login, MAX_URL);
 203     LOCAL_ARRAY(char, host, MAX_URL);
 204     LOCAL_ARRAY(char, urlpath, MAX_URL);
 205     char *t = NULL;
 206     char *q = NULL;
 207     int port;
 208     protocol_t protocol = PROTO_NONE;
 209     int l;
 210     int i;
 211     const char *src;
 212     char *dst;
 213     proto[0] = host[0] = urlpath[0] = login[0] = '\0';
 214
 215     if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
 216         /* terminate so it doesn't overflow other buffers */
 217         *(url + (MAX_URL >> 1)) = '\0';
 218         debugs(23, 1, "urlParse: URL too large (" << l << " bytes)");
 219         return NULL;
 220     }
 221     if (method == METHOD_CONNECT) {
 222         port = CONNECT_PORT;
 223
 224         if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
 225             if (sscanf(url, "%[^:]:%d", host, &port) < 1)
 226                 return NULL;
 227
 228     } else if (!strncmp(url, "urn:", 4)) {
 229         return urnParse(method, url);
 230     } else {
 231         /* Parse the URL: */
 232         src = url;
 233         i = 0;
 234         /* Find first : - everything before is protocol */
 235         for (i = 0, dst = proto; i < l && *src != ':'; i++, src++, dst++) {
 236             *dst = *src;
 237         }
 238         if (i >= l)
 239             return NULL;
 240         *dst = '\0';
 241
 242         /* Then its :// */
 243         /* (XXX yah, I'm not checking we've got enough data left before checking the array..) */
 244         if (*src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
 245             return NULL;
 246         i += 3;
 247         src += 3;
 248
 249         /* Then everything until first /; thats host (and port; which we'll look for here later) */
 250         /* bug 1881: If we don't get a "/" then we imply it was there */
 251         for (dst = host; i < l && *src != '/' && *src != '\0'; i++, src++, dst++) {
 252             *dst = *src;
 253         }
 254
 255         /*
 256          * We can't check for "i >= l" here because we could be at the end of the line
 257          * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
 258          * been -given- a valid URL and the path is just '/'.
 259          */
 260         if (i > l)
 261             return NULL;
 262         *dst = '\0';
 263
 264         /* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
 265         for (dst = urlpath; i < l && *src != '\r' && *src != '\n' && *src != '\0'; i++, src++, dst++) {
 266             *dst = *src;
 267         }
 268
 269         /* We -could- be at the end of the buffer here */
 270         if (i > l)
 271             return NULL;
 272         /* If the URL path is empty we set it to be "/" */
 273         if (dst == urlpath) {
 274             *(dst++) = '/';
 275         }
 276         *dst = '\0';
 277
 278         protocol = urlParseProtocol(proto);
 279         port = urlDefaultPort(protocol);
 280
 281         /* Is there any login information? (we should eventually parse it above) */
 282         if ((t = strrchr(host, '@'))) {
 283             strcpy((char *) login, (char *) host);
 284             t = strrchr(login, '@');
 285             *t = 0;
 286             strcpy((char *) host, t + 1);
 287         }
 288
 289         /* Is there any host information? (we should eventually parse it above) */
 290         if (*host == '[') {
 291             /* strip any IPA brackets. valid under IPv6. */
 292             dst = host;
 293 #if USE_IPV6
 294             /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
 295             src = host;
 296             src++;
 297             l = strlen(host);
 298             i = 1;
 299             for (; i < l && *src != ']' && *src != '\0'; i++, src++, dst++) {
 300                 *dst = *src;
 301             }
 302
 303             /* we moved in-place, so truncate the actual hostname found */
 304             *(dst++) = '\0';
 305 #else
 306             /* IPv4-pure needs to skip the whole hostname to ']' inclusive for now */
 307             while (*dst != '\0' && *dst != ']') dst++;
 308 #endif
 309
 310             /* skip ahead to either start of port, or original EOS */
 311             while (*dst != '\0' && *dst != ':') dst++;
 312             t = dst;
 313         } else {
 314             t = strrchr(host, ':');
 315
 316             if (t != strchr(host,':') ) {
 317                 /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
 318                 /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
 319                 /* therefore we MUST accept the case where they are not bracketed at all. */
 320                 t = NULL;
 321             }
 322         }
 323
 324         if (t && *t == ':') {
 325             *t = '\0';
 326             t++;
 327             port = atoi(t);
 328         }
 329     }
 330
 331     for (t = host; *t; t++)
 332         *t = xtolower(*t);
 333
 334     if (stringHasWhitespace(host)) {
 335         if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
 336             t = q = host;
 337             while (*t) {
 338                 if (!xisspace(*t))
 339                     *q++ = *t;
 340                 t++;
 341             }
 342             *q = '\0';
 343         }
 344     }
 345
 346     debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
 347
 348     if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
 349         debugs(23, 1, "urlParse: Illegal character in hostname '" << host << "'");
 350         return NULL;
 351     }
 352
 353     if (Config.appendDomain && !strchr(host, '.'))
 354         strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
 355
 356     /* remove trailing dots from hostnames */
 357     while ((l = strlen(host)) > 0 && host[--l] == '.')
 358         host[l] = '\0';
 359
 360     /* reject duplicate or leading dots */
 361     if (strstr(host, "..") || *host == '.') {
 362         debugs(23, 1, "urlParse: Illegal hostname '" << host << "'");
 363         return NULL;
 364     }
 365
 366     if (port < 1 || port > 65535) {
 367         debugs(23, 3, "urlParse: Invalid port '" << port << "'");
 368         return NULL;
 369     }
 370
 371 #ifdef HARDCODE_DENY_PORTS
 372     /* These ports are filtered in the default squid.conf, but
 373      * maybe someone wants them hardcoded... */
 374     if (port == 7 || port == 9 || port == 19) {
 375         debugs(23, 0, "urlParse: Deny access to port " << port);
 376         return NULL;
 377     }
 378 #endif
 379
 380     if (stringHasWhitespace(urlpath)) {
 381         debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
 382
 383         switch (Config.uri_whitespace) {
 384
 385         case URI_WHITESPACE_DENY:
 386             return NULL;
 387
 388         case URI_WHITESPACE_ALLOW:
 389             break;
 390
 391         case URI_WHITESPACE_ENCODE:
 392             t = rfc1738_escape_unescaped(urlpath);
 393             xstrncpy(urlpath, t, MAX_URL);
 394             break;
 395
 396         case URI_WHITESPACE_CHOP:
 397             *(urlpath + strcspn(urlpath, w_space)) = '\0';
 398             break;
 399
 400         case URI_WHITESPACE_STRIP:
 401         default:
 402             t = q = urlpath;
 403             while (*t) {
 404                 if (!xisspace(*t))
 405                     *q++ = *t;
 406                 t++;
 407             }
 408             *q = '\0';
 409         }
 410     }
 411
 412     if (NULL == request)
 413         request = new HttpRequest(method, protocol, urlpath);
 414     else {
 415         request->initHTTP(method, protocol, urlpath);
 416     }
 417
 418     request->SetHost(host);
 419     xstrncpy(request->login, login, MAX_LOGIN_SZ);
 420     request->port = (u_short) port;
 421     return request;
 422 }
 423
 424 static HttpRequest *
 425 urnParse(const HttpRequestMethod& method, char *urn)
 426 {
 427     debugs(50, 5, "urnParse: " << urn);
 428     return new HttpRequest(method, PROTO_URN, urn + 4);
 429 }
 430
 431 const char *
 432 urlCanonical(HttpRequest * request)
 433 {
 434     LOCAL_ARRAY(char, portbuf, 32);
 435 /// \todo AYJ: Performance: making this a ptr and allocating when needed will be better than a write and future xstrdup().
 436     LOCAL_ARRAY(char, urlbuf, MAX_URL);
 437
 438     if (request->canonical)
 439         return request->canonical;
 440
 441     if (request->protocol == PROTO_URN) {
 442         snprintf(urlbuf, MAX_URL, "urn:%s", request->urlpath.buf());
 443     } else {
 444 /// \todo AYJ: this could use "if..else and method == METHOD_CONNECT" easier.
 445         switch (request->method.id()) {
 446
 447         case METHOD_CONNECT:
 448             snprintf(urlbuf, MAX_URL, "%s:%d", request->GetHost(), request->port);
 449             break;
 450
 451         default:
 452             portbuf[0] = '\0';
 453
 454             if (request->port != urlDefaultPort(request->protocol))
 455                 snprintf(portbuf, 32, ":%d", request->port);
 456
 457             snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s%s",
 458                      ProtocolStr[request->protocol],
 459                      request->login,
 460                      *request->login ? "@" : null_string,
 461                      request->GetHost(),
 462                      portbuf,
 463                      request->urlpath.buf());
 464
 465             break;
 466         }
 467     }
 468
 469     return (request->canonical = xstrdup(urlbuf));
 470 }
 471
 472 /** \todo AYJ: Performance: This is an *almost* duplicate of urlCanoncical. But elides the query-string.
 473  *        After copying it on in the first place! Would be less code to merge the two with a flag parameter.
 474  *        and never copy the query-string part in the first place
 475  */
 476 char *
 477 urlCanonicalClean(const HttpRequest * request)
 478 {
 479     LOCAL_ARRAY(char, buf, MAX_URL);
 480     LOCAL_ARRAY(char, portbuf, 32);
 481     LOCAL_ARRAY(char, loginbuf, MAX_LOGIN_SZ + 1);
 482     char *t;
 483
 484     if (request->protocol == PROTO_URN) {
 485         snprintf(buf, MAX_URL, "urn:%s", request->urlpath.buf());
 486     } else {
 487 /// \todo AYJ: this could use "if..else and method == METHOD_CONNECT" easier.
 488         switch (request->method.id()) {
 489
 490         case METHOD_CONNECT:
 491             snprintf(buf, MAX_URL, "%s:%d",
 492                      request->GetHost(),
 493                      request->port);
 494             break;
 495
 496         default:
 497             portbuf[0] = '\0';
 498
 499             if (request->port != urlDefaultPort(request->protocol))
 500                 snprintf(portbuf, 32, ":%d", request->port);
 501
 502             loginbuf[0] = '\0';
 503
 504             if ((int) strlen(request->login) > 0) {
 505                 strcpy(loginbuf, request->login);
 506
 507                 if ((t = strchr(loginbuf, ':')))
 508                     *t = '\0';
 509
 510                 strcat(loginbuf, "@");
 511             }
 512
 513             snprintf(buf, MAX_URL, "%s://%s%s%s%s",
 514                      ProtocolStr[request->protocol],
 515                      loginbuf,
 516                      request->GetHost(),
 517                      portbuf,
 518                      request->urlpath.buf());
 519             /*
 520              * strip arguments AFTER a question-mark
 521              */
 522
 523             if (Config.onoff.strip_query_terms)
 524                 if ((t = strchr(buf, '?')))
 525                     *(++t) = '\0';
 526
 527             break;
 528         }
 529     }
 530
 531     if (stringHasCntl(buf))
 532         xstrncpy(buf, rfc1738_escape_unescaped(buf), MAX_URL);
 533
 534     return buf;
 535 }
 536
 537 /*
 538  * Test if a URL is relative.
 539  *
 540  * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
 541  * appear before a ':'.
 542  */
 543 bool
 544 urlIsRelative(const char *url)
 545 {
 546     const char *p;
 547
 548     if (url == NULL) {
 549         return (false);
 550     }
 551     if (*url == '\0') {
 552         return (false);
 553     }
 554
 555     for (p = url; *p != '\0' && *p != ':' && *p != '/'; p++);
 556
 557     if (*p == ':') {
 558         return (false);
 559     }
 560     return (true);
 561 }
 562
 563 /*
 564  * Convert a relative URL to an absolute URL using the context of a given
 565  * request.
 566  *
 567  * It is assumed that you have already ensured that the URL is relative.
 568  *
 569  * If NULL is returned it is an indication that the method in use in the
 570  * request does not distinguish between relative and absolute and you should
 571  * use the url unchanged.
 572  *
 573  * If non-NULL is returned, it is up to the caller to free the resulting
 574  * memory using safe_free().
 575  */
 576 char *
 577 urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
 578 {
 579
 580     if (req->method.id() == METHOD_CONNECT) {
 581         return (NULL);
 582     }
 583
 584     char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
 585
 586     if (req->protocol == PROTO_URN) {
 587         snprintf(urlbuf, MAX_URL, "urn:%s", req->urlpath.buf());
 588         return (urlbuf);
 589     }
 590
 591     size_t urllen;
 592
 593     if (req->port != urlDefaultPort(req->protocol)) {
 594         urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s:%d",
 595                           ProtocolStr[req->protocol],
 596                           req->login,
 597                           *req->login ? "@" : null_string,
 598                           req->GetHost(),
 599                           req->port
 600                          );
 601     } else {
 602         urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s",
 603                           ProtocolStr[req->protocol],
 604                           req->login,
 605                           *req->login ? "@" : null_string,
 606                           req->GetHost()
 607                          );
 608     }
 609
 610     if (relUrl[0] == '/') {
 611         strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
 612     } else {
 613         const char *path = req->urlpath.buf();
 614         const char *last_slash = strrchr(path, '/');
 615
 616         if (last_slash == NULL) {
 617             urlbuf[urllen++] = '/';
 618             strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
 619         } else {
 620             last_slash++;
 621             size_t pathlen = last_slash - path;
 622             if (pathlen > MAX_URL - urllen - 1) {
 623                 pathlen = MAX_URL - urllen - 1;
 624             }
 625             strncpy(&urlbuf[urllen], path, pathlen);
 626             urllen += pathlen;
 627             if (urllen + 1 < MAX_URL) {
 628                 strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
 629             }
 630         }
 631     }
 632
 633     return (urlbuf);
 634 }
 635
 636 /*
 637  * matchDomainName() compares a hostname with a domainname according
 638  * to the following rules:
 639  *
 640  *    HOST          DOMAIN        MATCH?
 641  * ------------- -------------    ------
 642  *    foo.com       foo.com         YES
 643  *   .foo.com       foo.com         YES
 644  *  x.foo.com       foo.com          NO
 645  *    foo.com      .foo.com         YES
 646  *   .foo.com      .foo.com         YES
 647  *  x.foo.com      .foo.com         YES
 648  *
 649  *  We strip leading dots on hosts (but not domains!) so that
 650  *  ".foo.com" is is always the same as "foo.com".
 651  *
 652  *  Return values:
 653  *     0 means the host matches the domain
 654  *     1 means the host is greater than the domain
 655  *    -1 means the host is less than the domain
 656  */
 657
 658 int
 659 matchDomainName(const char *h, const char *d)
 660 {
 661     int dl;
 662     int hl;
 663
 664     while ('.' == *h)
 665         h++;
 666
 667     hl = strlen(h);
 668
 669     dl = strlen(d);
 670
 671     /*
 672      * Start at the ends of the two strings and work towards the
 673      * beginning.
 674      */
 675     while (xtolower(h[--hl]) == xtolower(d[--dl])) {
 676         if (hl == 0 && dl == 0) {
 677             /*
 678              * We made it all the way to the beginning of both
 679              * strings without finding any difference.
 680              */
 681             return 0;
 682         }
 683
 684         if (0 == hl) {
 685             /*
 686              * The host string is shorter than the domain string.
 687              * There is only one case when this can be a match.
 688              * If the domain is just one character longer, and if
 689              * that character is a leading '.' then we call it a
 690              * match.
 691              */
 692
 693             if (1 == dl && '.' == d[0])
 694                 return 0;
 695             else
 696                 return -1;
 697         }
 698
 699         if (0 == dl) {
 700             /*
 701              * The domain string is shorter than the host string.
 702              * This is a match only if the first domain character
 703              * is a leading '.'.
 704              */
 705
 706             if ('.' == d[0])
 707                 return 0;
 708             else
 709                 return 1;
 710         }
 711     }
 712
 713     /*
 714      * We found different characters in the same position (from the end).
 715      */
 716     /*
 717      * If one of those character is '.' then its special.  In order
 718      * for splay tree sorting to work properly, "x-foo.com" must
 719      * be greater than ".foo.com" even though '-' is less than '.'.
 720      */
 721     if ('.' == d[dl])
 722         return 1;
 723
 724     if ('.' == h[hl])
 725         return -1;
 726
 727     return (xtolower(h[hl]) - xtolower(d[dl]));
 728 }
 729
 730
 731 /*
 732  * return true if we can serve requests for this method.
 733  */
 734 int
 735 urlCheckRequest(const HttpRequest * r)
 736 {
 737     int rc = 0;
 738     /* protocol "independent" methods
 739      *
 740      * actually these methods are specific to HTTP:
 741      * they are methods we recieve on our HTTP port,
 742      * and if we had a FTP listener would not be relevant
 743      * there.
 744      *
 745      * So, we should delegate them to HTTP. The problem is that we
 746      * do not have a default protocol from the client side of HTTP.
 747      */
 748
 749     if (r->method == METHOD_CONNECT)
 750         return 1;
 751
 752     if (r->method == METHOD_TRACE)
 753         return 1;
 754
 755     if (r->method == METHOD_PURGE)
 756         return 1;
 757
 758     /* does method match the protocol? */
 759     switch (r->protocol) {
 760
 761     case PROTO_URN:
 762
 763     case PROTO_HTTP:
 764
 765     case PROTO_CACHEOBJ:
 766         rc = 1;
 767         break;
 768
 769     case PROTO_FTP:
 770
 771         if (r->method == METHOD_PUT)
 772             rc = 1;
 773
 774     case PROTO_GOPHER:
 775
 776     case PROTO_WAIS:
 777
 778     case PROTO_WHOIS:
 779         if (r->method == METHOD_GET)
 780             rc = 1;
 781         else if (r->method == METHOD_HEAD)
 782             rc = 1;
 783
 784         break;
 785
 786     case PROTO_HTTPS:
 787 #ifdef USE_SSL
 788
 789         rc = 1;
 790
 791         break;
 792
 793 #else
 794         /*
 795         * Squid can't originate an SSL connection, so it should
 796         * never receive an "https:" URL.  It should always be
 797         * CONNECT instead.
 798         */
 799         rc = 0;
 800
 801 #endif
 802
 803     default:
 804         break;
 805     }
 806
 807     return rc;
 808 }
 809
 810 /*
 811  * Quick-n-dirty host extraction from a URL.  Steps:
 812  *      Look for a colon
 813  *      Skip any '/' after the colon
 814  *      Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
 815  *      Look for an ending '/' or ':' and terminate
 816  *      Look for login info preceeded by '@'
 817  */
 818
 819 class URLHostName
 820 {
 821
 822 public:
 823     char * extract(char const *url);
 824
 825 private:
 826     static char Host [SQUIDHOSTNAMELEN];
 827     void init(char const *);
 828     void findHostStart();
 829     void trimTrailingChars();
 830     void trimAuth();
 831     char const *hostStart;
 832     char const *url;
 833 };
 834
 835 char *
 836 urlHostname(const char *url)
 837 {
 838     return URLHostName().extract(url);
 839 }
 840
 841 char URLHostName::Host[SQUIDHOSTNAMELEN];
 842
 843 void
 844 URLHostName::init(char const *aUrl)
 845 {
 846     Host[0] = '\0';
 847     url = aUrl;
 848 }
 849
 850 void
 851 URLHostName::findHostStart()
 852 {
 853     if (NULL == (hostStart = strchr(url, ':')))
 854         return;
 855
 856     ++hostStart;
 857
 858     while (*hostStart != '\0' && *hostStart == '/')
 859         ++hostStart;
 860
 861 #if USE_IPV6
 862     if (*hostStart == ']')
 863         ++hostStart;
 864 #endif
 865
 866 }
 867
 868 void
 869 URLHostName::trimTrailingChars()
 870 {
 871     char *t;
 872
 873     if ((t = strchr(Host, '/')))
 874         *t = '\0';
 875
 876     if ((t = strrchr(Host, ':')))
 877         *t = '\0';
 878
 879 #if USE_IPV6
 880     if ((t = strchr(Host, ']')))
 881         *t = '\0';
 882 #endif
 883
 884 }
 885
 886 void
 887 URLHostName::trimAuth()
 888 {
 889     char *t;
 890
 891     if ((t = strrchr(Host, '@'))) {
 892         t++;
 893         xmemmove(Host, t, strlen(t) + 1);
 894     }
 895 }
 896
 897 char *
 898 URLHostName::extract(char const *aUrl)
 899 {
 900     init(aUrl);
 901     findHostStart();
 902
 903     if (hostStart == NULL)
 904         return NULL;
 905
 906     xstrncpy(Host, hostStart, SQUIDHOSTNAMELEN);
 907
 908     trimTrailingChars();
 909
 910     trimAuth();
 911
 912     return Host;
 913 }
 914
 915 URL::URL() : scheme()
 916 {}
 917
 918 URL::URL(URLScheme const &aScheme): scheme(aScheme)
 919 {}