src/url.cc

   1 /*
   2  * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 /* DEBUG: section 23    URL Parsing */
  10
  11 #include "squid.h"
  12 #include "globals.h"
  13 #include "HttpRequest.h"
  14 #include "rfc1738.h"
  15 #include "SquidConfig.h"
  16 #include "SquidString.h"
  17 #include "URL.h"
  18
  19 static HttpRequest *urlParseFinish(const HttpRequestMethod& method,
  20                                    const AnyP::ProtocolType protocol,
  21                                    const char *const urlpath,
  22                                    const char *const host,
  23                                    const SBuf &login,
  24                                    const int port,
  25                                    HttpRequest *request);
  26 static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request);
  27 static const char valid_hostname_chars_u[] =
  28     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  29     "abcdefghijklmnopqrstuvwxyz"
  30     "0123456789-._"
  31     "[:]"
  32     ;
  33 static const char valid_hostname_chars[] =
  34     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  35     "abcdefghijklmnopqrstuvwxyz"
  36     "0123456789-."
  37     "[:]"
  38     ;
  39
  40 const SBuf &
  41 URL::Asterisk()
  42 {
  43     static SBuf star("*");
  44     return star;
  45 }
  46
  47 void
  48 urlInitialize(void)
  49 {
  50     debugs(23, 5, "urlInitialize: Initializing...");
  51     /* this ensures that the number of protocol strings is the same as
  52      * the enum slots allocated because the last enum is always 'MAX'.
  53      */
  54     assert(strcmp(AnyP::ProtocolType_str[AnyP::PROTO_MAX], "MAX") == 0);
  55     /*
  56      * These test that our matchDomainName() function works the
  57      * way we expect it to.
  58      */
  59     assert(0 == matchDomainName("foo.com", "foo.com"));
  60     assert(0 == matchDomainName(".foo.com", "foo.com"));
  61     assert(0 == matchDomainName("foo.com", ".foo.com"));
  62     assert(0 == matchDomainName(".foo.com", ".foo.com"));
  63     assert(0 == matchDomainName("x.foo.com", ".foo.com"));
  64     assert(0 != matchDomainName("x.foo.com", "foo.com"));
  65     assert(0 != matchDomainName("foo.com", "x.foo.com"));
  66     assert(0 != matchDomainName("bar.com", "foo.com"));
  67     assert(0 != matchDomainName(".bar.com", "foo.com"));
  68     assert(0 != matchDomainName(".bar.com", ".foo.com"));
  69     assert(0 != matchDomainName("bar.com", ".foo.com"));
  70     assert(0 < matchDomainName("zzz.com", "foo.com"));
  71     assert(0 > matchDomainName("aaa.com", "foo.com"));
  72     assert(0 == matchDomainName("FOO.com", "foo.COM"));
  73     assert(0 < matchDomainName("bfoo.com", "afoo.com"));
  74     assert(0 > matchDomainName("afoo.com", "bfoo.com"));
  75     assert(0 < matchDomainName("x-foo.com", ".foo.com"));
  76     /* more cases? */
  77 }
  78
  79 /**
  80  * urlParseProtocol() takes begin (b) and end (e) pointers, but for
  81  * backwards compatibility, e defaults to NULL, in which case we
  82  * assume b is NULL-terminated.
  83  */
  84 AnyP::ProtocolType
  85 urlParseProtocol(const char *b, const char *e)
  86 {
  87     /*
  88      * if e is NULL, b must be NULL terminated and we
  89      * make e point to the first whitespace character
  90      * after b.
  91      */
  92
  93     if (NULL == e)
  94         e = b + strcspn(b, ":");
  95
  96     int len = e - b;
  97
  98     /* test common stuff first */
  99
 100     if (strncasecmp(b, "http", len) == 0)
 101         return AnyP::PROTO_HTTP;
 102
 103     if (strncasecmp(b, "ftp", len) == 0)
 104         return AnyP::PROTO_FTP;
 105
 106     if (strncasecmp(b, "https", len) == 0)
 107         return AnyP::PROTO_HTTPS;
 108
 109     if (strncasecmp(b, "file", len) == 0)
 110         return AnyP::PROTO_FTP;
 111
 112     if (strncasecmp(b, "coap", len) == 0)
 113         return AnyP::PROTO_COAP;
 114
 115     if (strncasecmp(b, "coaps", len) == 0)
 116         return AnyP::PROTO_COAPS;
 117
 118     if (strncasecmp(b, "gopher", len) == 0)
 119         return AnyP::PROTO_GOPHER;
 120
 121     if (strncasecmp(b, "wais", len) == 0)
 122         return AnyP::PROTO_WAIS;
 123
 124     if (strncasecmp(b, "cache_object", len) == 0)
 125         return AnyP::PROTO_CACHE_OBJECT;
 126
 127     if (strncasecmp(b, "urn", len) == 0)
 128         return AnyP::PROTO_URN;
 129
 130     if (strncasecmp(b, "whois", len) == 0)
 131         return AnyP::PROTO_WHOIS;
 132
 133     return AnyP::PROTO_NONE;
 134 }
 135
 136 int
 137 urlDefaultPort(AnyP::ProtocolType p)
 138 {
 139     switch (p) {
 140
 141     case AnyP::PROTO_HTTP:
 142         return 80;
 143
 144     case AnyP::PROTO_HTTPS:
 145         return 443;
 146
 147     case AnyP::PROTO_FTP:
 148         return 21;
 149
 150     case AnyP::PROTO_COAP:
 151     case AnyP::PROTO_COAPS:
 152         // coaps:// default is TBA as of draft-ietf-core-coap-08.
 153         // Assuming IANA policy of allocating same port for base and TLS protocol versions will occur.
 154         return 5683;
 155
 156     case AnyP::PROTO_GOPHER:
 157         return 70;
 158
 159     case AnyP::PROTO_WAIS:
 160         return 210;
 161
 162     case AnyP::PROTO_CACHE_OBJECT:
 163         return CACHE_HTTP_PORT;
 164
 165     case AnyP::PROTO_WHOIS:
 166         return 43;
 167
 168     default:
 169         return 0;
 170     }
 171 }
 172
 173 /*
 174  * Parse a URI/URL.
 175  *
 176  * If the 'request' arg is non-NULL, put parsed values there instead
 177  * of allocating a new HttpRequest.
 178  *
 179  * This abuses HttpRequest as a way of representing the parsed url
 180  * and its components.
 181  * method is used to switch parsers and to init the HttpRequest.
 182  * If method is Http::METHOD_CONNECT, then rather than a URL a hostname:port is
 183  * looked for.
 184  * The url is non const so that if its too long we can NULL-terminate it in place.
 185  */
 186
 187 /*
 188  * This routine parses a URL. Its assumed that the URL is complete -
 189  * ie, the end of the string is the end of the URL. Don't pass a partial
 190  * URL here as this routine doesn't have any way of knowing whether
 191  * its partial or not (ie, it handles the case of no trailing slash as
 192  * being "end of host with implied path of /".
 193  */
 194 HttpRequest *
 195 urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
 196 {
 197     LOCAL_ARRAY(char, proto, MAX_URL);
 198     LOCAL_ARRAY(char, login, MAX_URL);
 199     LOCAL_ARRAY(char, host, MAX_URL);
 200     LOCAL_ARRAY(char, urlpath, MAX_URL);
 201     char *t = NULL;
 202     char *q = NULL;
 203     int port;
 204     AnyP::ProtocolType protocol = AnyP::PROTO_NONE;
 205     int l;
 206     int i;
 207     const char *src;
 208     char *dst;
 209     proto[0] = host[0] = urlpath[0] = login[0] = '\0';
 210
 211     if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
 212         /* terminate so it doesn't overflow other buffers */
 213         *(url + (MAX_URL >> 1)) = '\0';
 214         debugs(23, DBG_IMPORTANT, "urlParse: URL too large (" << l << " bytes)");
 215         return NULL;
 216     }
 217     if (method == Http::METHOD_CONNECT) {
 218         port = CONNECT_PORT;
 219
 220         if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
 221             if (sscanf(url, "%[^:]:%d", host, &port) < 1)
 222                 return NULL;
 223
 224     } else if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
 225                URL::Asterisk().cmp(url) == 0) {
 226         protocol = AnyP::PROTO_HTTP;
 227         port = urlDefaultPort(protocol);
 228         return urlParseFinish(method, protocol, url, host, SBuf(), port, request);
 229     } else if (!strncmp(url, "urn:", 4)) {
 230         return urnParse(method, url, request);
 231     } else {
 232         /* Parse the URL: */
 233         src = url;
 234         i = 0;
 235         /* Find first : - everything before is protocol */
 236         for (i = 0, dst = proto; i < l && *src != ':'; ++i, ++src, ++dst) {
 237             *dst = *src;
 238         }
 239         if (i >= l)
 240             return NULL;
 241         *dst = '\0';
 242
 243         /* Then its :// */
 244         if ((i+3) > l || *src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
 245             return NULL;
 246         i += 3;
 247         src += 3;
 248
 249         /* Then everything until first /; thats host (and port; which we'll look for here later) */
 250         // bug 1881: If we don't get a "/" then we imply it was there
 251         // bug 3074: We could just be given a "?" or "#". These also imply "/"
 252         // bug 3233: whitespace is also a hostname delimiter.
 253         for (dst = host; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
 254             *dst = *src;
 255         }
 256
 257         /*
 258          * We can't check for "i >= l" here because we could be at the end of the line
 259          * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
 260          * been -given- a valid URL and the path is just '/'.
 261          */
 262         if (i > l)
 263             return NULL;
 264         *dst = '\0';
 265
 266         // bug 3074: received 'path' starting with '?', '#', or '\0' implies '/'
 267         if (*src == '?' || *src == '#' || *src == '\0') {
 268             urlpath[0] = '/';
 269             dst = &urlpath[1];
 270         } else {
 271             dst = urlpath;
 272         }
 273         /* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
 274         for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
 275             *dst = *src;
 276         }
 277
 278         /* We -could- be at the end of the buffer here */
 279         if (i > l)
 280             return NULL;
 281         /* If the URL path is empty we set it to be "/" */
 282         if (dst == urlpath) {
 283             *dst = '/';
 284             ++dst;
 285         }
 286         *dst = '\0';
 287
 288         protocol = urlParseProtocol(proto);
 289         port = urlDefaultPort(protocol);
 290
 291         /* Is there any login information? (we should eventually parse it above) */
 292         t = strrchr(host, '@');
 293         if (t != NULL) {
 294             strncpy((char *) login, (char *) host, sizeof(login)-1);
 295             login[sizeof(login)-1] = '\0';
 296             t = strrchr(login, '@');
 297             *t = 0;
 298             strncpy((char *) host, t + 1, sizeof(host)-1);
 299             host[sizeof(host)-1] = '\0';
 300         }
 301
 302         /* Is there any host information? (we should eventually parse it above) */
 303         if (*host == '[') {
 304             /* strip any IPA brackets. valid under IPv6. */
 305             dst = host;
 306             /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
 307             src = host;
 308             ++src;
 309             l = strlen(host);
 310             i = 1;
 311             for (; i < l && *src != ']' && *src != '\0'; ++i, ++src, ++dst) {
 312                 *dst = *src;
 313             }
 314
 315             /* we moved in-place, so truncate the actual hostname found */
 316             *dst = '\0';
 317             ++dst;
 318
 319             /* skip ahead to either start of port, or original EOS */
 320             while (*dst != '\0' && *dst != ':')
 321                 ++dst;
 322             t = dst;
 323         } else {
 324             t = strrchr(host, ':');
 325
 326             if (t != strchr(host,':') ) {
 327                 /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
 328                 /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
 329                 /* therefore we MUST accept the case where they are not bracketed at all. */
 330                 t = NULL;
 331             }
 332         }
 333
 334         // Bug 3183 sanity check: If scheme is present, host must be too.
 335         if (protocol != AnyP::PROTO_NONE && host[0] == '\0') {
 336             debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details.");
 337             return NULL;
 338         }
 339
 340         if (t && *t == ':') {
 341             *t = '\0';
 342             ++t;
 343             port = atoi(t);
 344         }
 345     }
 346
 347     for (t = host; *t; ++t)
 348         *t = xtolower(*t);
 349
 350     if (stringHasWhitespace(host)) {
 351         if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
 352             t = q = host;
 353             while (*t) {
 354                 if (!xisspace(*t)) {
 355                     *q = *t;
 356                     ++q;
 357                 }
 358                 ++t;
 359             }
 360             *q = '\0';
 361         }
 362     }
 363
 364     debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
 365
 366     if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
 367         debugs(23, DBG_IMPORTANT, "urlParse: Illegal character in hostname '" << host << "'");
 368         return NULL;
 369     }
 370
 371     /* For IPV6 addresses also check for a colon */
 372     if (Config.appendDomain && !strchr(host, '.') && !strchr(host, ':'))
 373         strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
 374
 375     /* remove trailing dots from hostnames */
 376     while ((l = strlen(host)) > 0 && host[--l] == '.')
 377         host[l] = '\0';
 378
 379     /* reject duplicate or leading dots */
 380     if (strstr(host, "..") || *host == '.') {
 381         debugs(23, DBG_IMPORTANT, "urlParse: Illegal hostname '" << host << "'");
 382         return NULL;
 383     }
 384
 385     if (port < 1 || port > 65535) {
 386         debugs(23, 3, "urlParse: Invalid port '" << port << "'");
 387         return NULL;
 388     }
 389
 390 #if HARDCODE_DENY_PORTS
 391     /* These ports are filtered in the default squid.conf, but
 392      * maybe someone wants them hardcoded... */
 393     if (port == 7 || port == 9 || port == 19) {
 394         debugs(23, DBG_CRITICAL, "urlParse: Deny access to port " << port);
 395         return NULL;
 396     }
 397 #endif
 398
 399     if (stringHasWhitespace(urlpath)) {
 400         debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
 401
 402         switch (Config.uri_whitespace) {
 403
 404         case URI_WHITESPACE_DENY:
 405             return NULL;
 406
 407         case URI_WHITESPACE_ALLOW:
 408             break;
 409
 410         case URI_WHITESPACE_ENCODE:
 411             t = rfc1738_escape_unescaped(urlpath);
 412             xstrncpy(urlpath, t, MAX_URL);
 413             break;
 414
 415         case URI_WHITESPACE_CHOP:
 416             *(urlpath + strcspn(urlpath, w_space)) = '\0';
 417             break;
 418
 419         case URI_WHITESPACE_STRIP:
 420         default:
 421             t = q = urlpath;
 422             while (*t) {
 423                 if (!xisspace(*t)) {
 424                     *q = *t;
 425                     ++q;
 426                 }
 427                 ++t;
 428             }
 429             *q = '\0';
 430         }
 431     }
 432
 433     return urlParseFinish(method, protocol, urlpath, host, SBuf(login), port, request);
 434 }
 435
 436 /**
 437  * Update request with parsed URI data.  If the request arg is
 438  * non-NULL, put parsed values there instead of allocating a new
 439  * HttpRequest.
 440  */
 441 static HttpRequest *
 442 urlParseFinish(const HttpRequestMethod& method,
 443                const AnyP::ProtocolType protocol,
 444                const char *const urlpath,
 445                const char *const host,
 446                const SBuf &login,
 447                const int port,
 448                HttpRequest *request)
 449 {
 450     if (NULL == request)
 451         request = new HttpRequest(method, protocol, urlpath);
 452     else {
 453         request->initHTTP(method, protocol, urlpath);
 454         safe_free(request->canonical);
 455     }
 456
 457     request->SetHost(host);
 458     request->url.userInfo(login);
 459     request->port = (unsigned short) port;
 460     return request;
 461 }
 462
 463 static HttpRequest *
 464 urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request)
 465 {
 466     debugs(50, 5, "urnParse: " << urn);
 467     if (request) {
 468         request->initHTTP(method, AnyP::PROTO_URN, urn + 4);
 469         safe_free(request->canonical);
 470         return request;
 471     }
 472
 473     return new HttpRequest(method, AnyP::PROTO_URN, urn + 4);
 474 }
 475
 476 const char *
 477 urlCanonical(HttpRequest * request)
 478 {
 479     LOCAL_ARRAY(char, portbuf, 32);
 480     LOCAL_ARRAY(char, urlbuf, MAX_URL);
 481
 482     if (request->canonical)
 483         return request->canonical;
 484
 485     if (request->url.getScheme() == AnyP::PROTO_URN) {
 486         snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
 487                  SQUIDSTRINGPRINT(request->urlpath));
 488     } else {
 489         switch (request->method.id()) {
 490
 491         case Http::METHOD_CONNECT:
 492             snprintf(urlbuf, MAX_URL, "%s:%d", request->GetHost(), request->port);
 493             break;
 494
 495         default: {
 496             portbuf[0] = '\0';
 497
 498             if (request->port != urlDefaultPort(request->url.getScheme()))
 499                 snprintf(portbuf, 32, ":%d", request->port);
 500
 501             snprintf(urlbuf, MAX_URL, "%s://" SQUIDSBUFPH "%s%s%s" SQUIDSTRINGPH,
 502                      request->url.getScheme().c_str(),
 503                      SQUIDSBUFPRINT(request->url.userInfo()),
 504                      !request->url.userInfo().isEmpty() ? "@" : "",
 505                      request->GetHost(),
 506                      portbuf,
 507                      SQUIDSTRINGPRINT(request->urlpath));
 508         }
 509         }
 510     }
 511
 512     return (request->canonical = xstrdup(urlbuf));
 513 }
 514
 515 /** \todo AYJ: Performance: This is an *almost* duplicate of urlCanonical. But elides the query-string.
 516  *        After copying it on in the first place! Would be less code to merge the two with a flag parameter.
 517  *        and never copy the query-string part in the first place
 518  */
 519 char *
 520 urlCanonicalClean(const HttpRequest * request)
 521 {
 522     LOCAL_ARRAY(char, buf, MAX_URL);
 523     LOCAL_ARRAY(char, portbuf, 32);
 524     char *t;
 525
 526     if (request->url.getScheme() == AnyP::PROTO_URN) {
 527         snprintf(buf, MAX_URL, "urn:" SQUIDSTRINGPH,
 528                  SQUIDSTRINGPRINT(request->urlpath));
 529     } else {
 530         switch (request->method.id()) {
 531
 532         case Http::METHOD_CONNECT:
 533             snprintf(buf, MAX_URL, "%s:%d", request->GetHost(), request->port);
 534             break;
 535
 536         default: {
 537             portbuf[0] = '\0';
 538
 539             if (request->port != urlDefaultPort(request->url.getScheme()))
 540                 snprintf(portbuf, 32, ":%d", request->port);
 541
 542             snprintf(buf, MAX_URL, "%s://" SQUIDSBUFPH "%s%s%s" SQUIDSTRINGPH,
 543                      request->url.getScheme().c_str(),
 544                      SQUIDSBUFPRINT(request->url.userInfo()),
 545                      (request->url.userInfo().isEmpty() ? "" : "@"),
 546                      request->GetHost(),
 547                      portbuf,
 548                      SQUIDSTRINGPRINT(request->urlpath));
 549
 550             // strip arguments AFTER a question-mark
 551             if (Config.onoff.strip_query_terms)
 552                 if ((t = strchr(buf, '?')))
 553                     *(++t) = '\0';
 554         }
 555         } // switch
 556     }
 557
 558     if (stringHasCntl(buf))
 559         xstrncpy(buf, rfc1738_escape_unescaped(buf), MAX_URL);
 560
 561     return buf;
 562 }
 563
 564 /**
 565  * Yet another alternative to urlCanonical.
 566  * This one adds the https:// parts to Http::METHOD_CONNECT URL
 567  * for use in error page outputs.
 568  * Luckily we can leverage the others instead of duplicating.
 569  */
 570 const char *
 571 urlCanonicalFakeHttps(const HttpRequest * request)
 572 {
 573     LOCAL_ARRAY(char, buf, MAX_URL);
 574
 575     // method CONNECT and port HTTPS
 576     if (request->method == Http::METHOD_CONNECT && request->port == 443) {
 577         snprintf(buf, MAX_URL, "https://%s/*", request->GetHost());
 578         return buf;
 579     }
 580
 581     // else do the normal complete canonical thing.
 582     return urlCanonicalClean(request);
 583 }
 584
 585 /*
 586  * Test if a URL is relative.
 587  *
 588  * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
 589  * appear before a ':'.
 590  */
 591 bool
 592 urlIsRelative(const char *url)
 593 {
 594     const char *p;
 595
 596     if (url == NULL) {
 597         return (false);
 598     }
 599     if (*url == '\0') {
 600         return (false);
 601     }
 602
 603     for (p = url; *p != '\0' && *p != ':' && *p != '/'; ++p);
 604
 605     if (*p == ':') {
 606         return (false);
 607     }
 608     return (true);
 609 }
 610
 611 /*
 612  * Convert a relative URL to an absolute URL using the context of a given
 613  * request.
 614  *
 615  * It is assumed that you have already ensured that the URL is relative.
 616  *
 617  * If NULL is returned it is an indication that the method in use in the
 618  * request does not distinguish between relative and absolute and you should
 619  * use the url unchanged.
 620  *
 621  * If non-NULL is returned, it is up to the caller to free the resulting
 622  * memory using safe_free().
 623  */
 624 char *
 625 urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
 626 {
 627
 628     if (req->method.id() == Http::METHOD_CONNECT) {
 629         return (NULL);
 630     }
 631
 632     char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
 633
 634     if (req->url.getScheme() == AnyP::PROTO_URN) {
 635         snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
 636                  SQUIDSTRINGPRINT(req->urlpath));
 637         return (urlbuf);
 638     }
 639
 640     size_t urllen;
 641
 642     if (req->port != urlDefaultPort(req->url.getScheme())) {
 643         urllen = snprintf(urlbuf, MAX_URL, "%s://" SQUIDSBUFPH "%s%s:%d",
 644                           req->url.getScheme().c_str(),
 645                           SQUIDSBUFPRINT(req->url.userInfo()),
 646                           !req->url.userInfo().isEmpty() ? "@" : "",
 647                           req->GetHost(),
 648                           req->port
 649                          );
 650     } else {
 651         urllen = snprintf(urlbuf, MAX_URL, "%s://" SQUIDSBUFPH "%s%s",
 652                           req->url.getScheme().c_str(),
 653                           SQUIDSBUFPRINT(req->url.userInfo()),
 654                           !req->url.userInfo().isEmpty() ? "@" : "",
 655                           req->GetHost()
 656                          );
 657     }
 658
 659     if (relUrl[0] == '/') {
 660         strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
 661     } else {
 662         const char *path = req->urlpath.termedBuf();
 663         const char *last_slash = strrchr(path, '/');
 664
 665         if (last_slash == NULL) {
 666             urlbuf[urllen] = '/';
 667             ++urllen;
 668             strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
 669         } else {
 670             ++last_slash;
 671             size_t pathlen = last_slash - path;
 672             if (pathlen > MAX_URL - urllen - 1) {
 673                 pathlen = MAX_URL - urllen - 1;
 674             }
 675             strncpy(&urlbuf[urllen], path, pathlen);
 676             urllen += pathlen;
 677             if (urllen + 1 < MAX_URL) {
 678                 strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
 679             }
 680         }
 681     }
 682
 683     return (urlbuf);
 684 }
 685
 686 /*
 687  * matchDomainName() compares a hostname with a domainname according
 688  * to the following rules:
 689  *
 690  *    HOST          DOMAIN        MATCH?
 691  * ------------- -------------    ------
 692  *    foo.com       foo.com         YES
 693  *   .foo.com       foo.com         YES
 694  *  x.foo.com       foo.com          NO
 695  *    foo.com      .foo.com         YES
 696  *   .foo.com      .foo.com         YES
 697  *  x.foo.com      .foo.com         YES
 698  *
 699  *  We strip leading dots on hosts (but not domains!) so that
 700  *  ".foo.com" is is always the same as "foo.com".
 701  *
 702  *  Return values:
 703  *     0 means the host matches the domain
 704  *     1 means the host is greater than the domain
 705  *    -1 means the host is less than the domain
 706  */
 707
 708 int
 709 matchDomainName(const char *h, const char *d)
 710 {
 711     int dl;
 712     int hl;
 713
 714     while ('.' == *h)
 715         ++h;
 716
 717     hl = strlen(h);
 718
 719     dl = strlen(d);
 720
 721     /*
 722      * Start at the ends of the two strings and work towards the
 723      * beginning.
 724      */
 725     while (xtolower(h[--hl]) == xtolower(d[--dl])) {
 726         if (hl == 0 && dl == 0) {
 727             /*
 728              * We made it all the way to the beginning of both
 729              * strings without finding any difference.
 730              */
 731             return 0;
 732         }
 733
 734         if (0 == hl) {
 735             /*
 736              * The host string is shorter than the domain string.
 737              * There is only one case when this can be a match.
 738              * If the domain is just one character longer, and if
 739              * that character is a leading '.' then we call it a
 740              * match.
 741              */
 742
 743             if (1 == dl && '.' == d[0])
 744                 return 0;
 745             else
 746                 return -1;
 747         }
 748
 749         if (0 == dl) {
 750             /*
 751              * The domain string is shorter than the host string.
 752              * This is a match only if the first domain character
 753              * is a leading '.'.
 754              */
 755
 756             if ('.' == d[0])
 757                 return 0;
 758             else
 759                 return 1;
 760         }
 761     }
 762
 763     /*
 764      * We found different characters in the same position (from the end).
 765      */
 766     /*
 767      * If one of those character is '.' then its special.  In order
 768      * for splay tree sorting to work properly, "x-foo.com" must
 769      * be greater than ".foo.com" even though '-' is less than '.'.
 770      */
 771     if ('.' == d[dl])
 772         return 1;
 773
 774     if ('.' == h[hl])
 775         return -1;
 776
 777     return (xtolower(h[hl]) - xtolower(d[dl]));
 778 }
 779
 780 /*
 781  * return true if we can serve requests for this method.
 782  */
 783 int
 784 urlCheckRequest(const HttpRequest * r)
 785 {
 786     int rc = 0;
 787     /* protocol "independent" methods
 788      *
 789      * actually these methods are specific to HTTP:
 790      * they are methods we recieve on our HTTP port,
 791      * and if we had a FTP listener would not be relevant
 792      * there.
 793      *
 794      * So, we should delegate them to HTTP. The problem is that we
 795      * do not have a default protocol from the client side of HTTP.
 796      */
 797
 798     if (r->method == Http::METHOD_CONNECT)
 799         return 1;
 800
 801     // we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
 802     // we also support forwarding OPTIONS and TRACE, except for the *-URI ones
 803     if (r->method == Http::METHOD_OPTIONS || r->method == Http::METHOD_TRACE)
 804         return (r->header.getInt64(HDR_MAX_FORWARDS) == 0 || URL::Asterisk().cmp(r->urlpath.rawBuf(), r->urlpath.size()) != 0);
 805
 806     if (r->method == Http::METHOD_PURGE)
 807         return 1;
 808
 809     /* does method match the protocol? */
 810     switch (r->url.getScheme()) {
 811
 812     case AnyP::PROTO_URN:
 813
 814     case AnyP::PROTO_HTTP:
 815
 816     case AnyP::PROTO_CACHE_OBJECT:
 817         rc = 1;
 818         break;
 819
 820     case AnyP::PROTO_FTP:
 821
 822         if (r->method == Http::METHOD_PUT)
 823             rc = 1;
 824
 825     case AnyP::PROTO_GOPHER:
 826
 827     case AnyP::PROTO_WAIS:
 828
 829     case AnyP::PROTO_WHOIS:
 830         if (r->method == Http::METHOD_GET)
 831             rc = 1;
 832         else if (r->method == Http::METHOD_HEAD)
 833             rc = 1;
 834
 835         break;
 836
 837     case AnyP::PROTO_HTTPS:
 838 #if USE_OPENSSL
 839
 840         rc = 1;
 841
 842         break;
 843
 844 #else
 845         /*
 846         * Squid can't originate an SSL connection, so it should
 847         * never receive an "https:" URL.  It should always be
 848         * CONNECT instead.
 849         */
 850         rc = 0;
 851
 852 #endif
 853
 854     default:
 855         break;
 856     }
 857
 858     return rc;
 859 }
 860
 861 /*
 862  * Quick-n-dirty host extraction from a URL.  Steps:
 863  *      Look for a colon
 864  *      Skip any '/' after the colon
 865  *      Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
 866  *      Look for an ending '/' or ':' and terminate
 867  *      Look for login info preceeded by '@'
 868  */
 869
 870 class URLHostName
 871 {
 872
 873 public:
 874     char * extract(char const *url);
 875
 876 private:
 877     static char Host [SQUIDHOSTNAMELEN];
 878     void init(char const *);
 879     void findHostStart();
 880     void trimTrailingChars();
 881     void trimAuth();
 882     char const *hostStart;
 883     char const *url;
 884 };
 885
 886 char *
 887 urlHostname(const char *url)
 888 {
 889     return URLHostName().extract(url);
 890 }
 891
 892 char URLHostName::Host[SQUIDHOSTNAMELEN];
 893
 894 void
 895 URLHostName::init(char const *aUrl)
 896 {
 897     Host[0] = '\0';
 898     url = aUrl;
 899 }
 900
 901 void
 902 URLHostName::findHostStart()
 903 {
 904     if (NULL == (hostStart = strchr(url, ':')))
 905         return;
 906
 907     ++hostStart;
 908
 909     while (*hostStart != '\0' && *hostStart == '/')
 910         ++hostStart;
 911
 912     if (*hostStart == ']')
 913         ++hostStart;
 914 }
 915
 916 void
 917 URLHostName::trimTrailingChars()
 918 {
 919     char *t;
 920
 921     if ((t = strchr(Host, '/')))
 922         *t = '\0';
 923
 924     if ((t = strrchr(Host, ':')))
 925         *t = '\0';
 926
 927     if ((t = strchr(Host, ']')))
 928         *t = '\0';
 929 }
 930
 931 void
 932 URLHostName::trimAuth()
 933 {
 934     char *t;
 935
 936     if ((t = strrchr(Host, '@'))) {
 937         ++t;
 938         memmove(Host, t, strlen(t) + 1);
 939     }
 940 }
 941
 942 char *
 943 URLHostName::extract(char const *aUrl)
 944 {
 945     init(aUrl);
 946     findHostStart();
 947
 948     if (hostStart == NULL)
 949         return NULL;
 950
 951     xstrncpy(Host, hostStart, SQUIDHOSTNAMELEN);
 952
 953     trimTrailingChars();
 954
 955     trimAuth();
 956
 957     return Host;
 958 }
 959