src/url.cc

   1 /*
   2  * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 /* DEBUG: section 23    URL Parsing */
  10
  11 #include "squid.h"
  12 #include "globals.h"
  13 #include "HttpRequest.h"
  14 #include "rfc1738.h"
  15 #include "SquidConfig.h"
  16 #include "SquidString.h"
  17 #include "URL.h"
  18
  19 static HttpRequest *urlParseFinish(const HttpRequestMethod& method,
  20                                    const AnyP::ProtocolType protocol,
  21                                    const char *const urlpath,
  22                                    const char *const host,
  23                                    const char *const login,
  24                                    const int port,
  25                                    HttpRequest *request);
  26 static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request);
  27 static const char valid_hostname_chars_u[] =
  28     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  29     "abcdefghijklmnopqrstuvwxyz"
  30     "0123456789-._"
  31     "[:]"
  32     ;
  33 static const char valid_hostname_chars[] =
  34     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  35     "abcdefghijklmnopqrstuvwxyz"
  36     "0123456789-."
  37     "[:]"
  38     ;
  39
  40 void
  41 urlInitialize(void)
  42 {
  43     debugs(23, 5, "urlInitialize: Initializing...");
  44     /* this ensures that the number of protocol strings is the same as
  45      * the enum slots allocated because the last enum is always 'MAX'.
  46      */
  47     assert(strcmp(AnyP::ProtocolType_str[AnyP::PROTO_MAX], "MAX") == 0);
  48     /*
  49      * These test that our matchDomainName() function works the
  50      * way we expect it to.
  51      */
  52     assert(0 == matchDomainName("foo.com", "foo.com"));
  53     assert(0 == matchDomainName(".foo.com", "foo.com"));
  54     assert(0 == matchDomainName("foo.com", ".foo.com"));
  55     assert(0 == matchDomainName(".foo.com", ".foo.com"));
  56     assert(0 == matchDomainName("x.foo.com", ".foo.com"));
  57     assert(0 != matchDomainName("x.foo.com", "foo.com"));
  58     assert(0 != matchDomainName("foo.com", "x.foo.com"));
  59     assert(0 != matchDomainName("bar.com", "foo.com"));
  60     assert(0 != matchDomainName(".bar.com", "foo.com"));
  61     assert(0 != matchDomainName(".bar.com", ".foo.com"));
  62     assert(0 != matchDomainName("bar.com", ".foo.com"));
  63     assert(0 < matchDomainName("zzz.com", "foo.com"));
  64     assert(0 > matchDomainName("aaa.com", "foo.com"));
  65     assert(0 == matchDomainName("FOO.com", "foo.COM"));
  66     assert(0 < matchDomainName("bfoo.com", "afoo.com"));
  67     assert(0 > matchDomainName("afoo.com", "bfoo.com"));
  68     assert(0 < matchDomainName("x-foo.com", ".foo.com"));
  69     /* more cases? */
  70 }
  71
  72 /**
  73  * urlParseProtocol() takes begin (b) and end (e) pointers, but for
  74  * backwards compatibility, e defaults to NULL, in which case we
  75  * assume b is NULL-terminated.
  76  */
  77 AnyP::ProtocolType
  78 urlParseProtocol(const char *b, const char *e)
  79 {
  80     /*
  81      * if e is NULL, b must be NULL terminated and we
  82      * make e point to the first whitespace character
  83      * after b.
  84      */
  85
  86     if (NULL == e)
  87         e = b + strcspn(b, ":");
  88
  89     int len = e - b;
  90
  91     /* test common stuff first */
  92
  93     if (strncasecmp(b, "http", len) == 0)
  94         return AnyP::PROTO_HTTP;
  95
  96     if (strncasecmp(b, "ftp", len) == 0)
  97         return AnyP::PROTO_FTP;
  98
  99     if (strncasecmp(b, "https", len) == 0)
 100         return AnyP::PROTO_HTTPS;
 101
 102     if (strncasecmp(b, "file", len) == 0)
 103         return AnyP::PROTO_FTP;
 104
 105     if (strncasecmp(b, "coap", len) == 0)
 106         return AnyP::PROTO_COAP;
 107
 108     if (strncasecmp(b, "coaps", len) == 0)
 109         return AnyP::PROTO_COAPS;
 110
 111     if (strncasecmp(b, "gopher", len) == 0)
 112         return AnyP::PROTO_GOPHER;
 113
 114     if (strncasecmp(b, "wais", len) == 0)
 115         return AnyP::PROTO_WAIS;
 116
 117     if (strncasecmp(b, "cache_object", len) == 0)
 118         return AnyP::PROTO_CACHE_OBJECT;
 119
 120     if (strncasecmp(b, "urn", len) == 0)
 121         return AnyP::PROTO_URN;
 122
 123     if (strncasecmp(b, "whois", len) == 0)
 124         return AnyP::PROTO_WHOIS;
 125
 126     return AnyP::PROTO_NONE;
 127 }
 128
 129 int
 130 urlDefaultPort(AnyP::ProtocolType p)
 131 {
 132     switch (p) {
 133
 134     case AnyP::PROTO_HTTP:
 135         return 80;
 136
 137     case AnyP::PROTO_HTTPS:
 138         return 443;
 139
 140     case AnyP::PROTO_FTP:
 141         return 21;
 142
 143     case AnyP::PROTO_COAP:
 144     case AnyP::PROTO_COAPS:
 145         // coaps:// default is TBA as of draft-ietf-core-coap-08.
 146         // Assuming IANA policy of allocating same port for base and TLS protocol versions will occur.
 147         return 5683;
 148
 149     case AnyP::PROTO_GOPHER:
 150         return 70;
 151
 152     case AnyP::PROTO_WAIS:
 153         return 210;
 154
 155     case AnyP::PROTO_CACHE_OBJECT:
 156         return CACHE_HTTP_PORT;
 157
 158     case AnyP::PROTO_WHOIS:
 159         return 43;
 160
 161     default:
 162         return 0;
 163     }
 164 }
 165
 166 /*
 167  * Parse a URI/URL.
 168  *
 169  * If the 'request' arg is non-NULL, put parsed values there instead
 170  * of allocating a new HttpRequest.
 171  *
 172  * This abuses HttpRequest as a way of representing the parsed url
 173  * and its components.
 174  * method is used to switch parsers and to init the HttpRequest.
 175  * If method is Http::METHOD_CONNECT, then rather than a URL a hostname:port is
 176  * looked for.
 177  * The url is non const so that if its too long we can NULL-terminate it in place.
 178  */
 179
 180 /*
 181  * This routine parses a URL. Its assumed that the URL is complete -
 182  * ie, the end of the string is the end of the URL. Don't pass a partial
 183  * URL here as this routine doesn't have any way of knowing whether
 184  * its partial or not (ie, it handles the case of no trailing slash as
 185  * being "end of host with implied path of /".
 186  */
 187 HttpRequest *
 188 urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
 189 {
 190     LOCAL_ARRAY(char, proto, MAX_URL);
 191     LOCAL_ARRAY(char, login, MAX_URL);
 192     LOCAL_ARRAY(char, host, MAX_URL);
 193     LOCAL_ARRAY(char, urlpath, MAX_URL);
 194     char *t = NULL;
 195     char *q = NULL;
 196     int port;
 197     AnyP::ProtocolType protocol = AnyP::PROTO_NONE;
 198     int l;
 199     int i;
 200     const char *src;
 201     char *dst;
 202     proto[0] = host[0] = urlpath[0] = login[0] = '\0';
 203
 204     if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
 205         /* terminate so it doesn't overflow other buffers */
 206         *(url + (MAX_URL >> 1)) = '\0';
 207         debugs(23, DBG_IMPORTANT, "urlParse: URL too large (" << l << " bytes)");
 208         return NULL;
 209     }
 210     if (method == Http::METHOD_CONNECT) {
 211         port = CONNECT_PORT;
 212
 213         if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
 214             if (sscanf(url, "%[^:]:%d", host, &port) < 1)
 215                 return NULL;
 216
 217     } else if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
 218                strcmp(url, "*") == 0) {
 219         protocol = AnyP::PROTO_HTTP;
 220         port = urlDefaultPort(protocol);
 221         return urlParseFinish(method, protocol, url, host, login, port, request);
 222     } else if (!strncmp(url, "urn:", 4)) {
 223         return urnParse(method, url, request);
 224     } else {
 225         /* Parse the URL: */
 226         src = url;
 227         i = 0;
 228         /* Find first : - everything before is protocol */
 229         for (i = 0, dst = proto; i < l && *src != ':'; ++i, ++src, ++dst) {
 230             *dst = *src;
 231         }
 232         if (i >= l)
 233             return NULL;
 234         *dst = '\0';
 235
 236         /* Then its :// */
 237         if ((i+3) > l || *src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
 238             return NULL;
 239         i += 3;
 240         src += 3;
 241
 242         /* Then everything until first /; thats host (and port; which we'll look for here later) */
 243         // bug 1881: If we don't get a "/" then we imply it was there
 244         // bug 3074: We could just be given a "?" or "#". These also imply "/"
 245         // bug 3233: whitespace is also a hostname delimiter.
 246         for (dst = host; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
 247             *dst = *src;
 248         }
 249
 250         /*
 251          * We can't check for "i >= l" here because we could be at the end of the line
 252          * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
 253          * been -given- a valid URL and the path is just '/'.
 254          */
 255         if (i > l)
 256             return NULL;
 257         *dst = '\0';
 258
 259         // bug 3074: received 'path' starting with '?', '#', or '\0' implies '/'
 260         if (*src == '?' || *src == '#' || *src == '\0') {
 261             urlpath[0] = '/';
 262             dst = &urlpath[1];
 263         } else {
 264             dst = urlpath;
 265         }
 266         /* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
 267         for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
 268             *dst = *src;
 269         }
 270
 271         /* We -could- be at the end of the buffer here */
 272         if (i > l)
 273             return NULL;
 274         /* If the URL path is empty we set it to be "/" */
 275         if (dst == urlpath) {
 276             *dst = '/';
 277             ++dst;
 278         }
 279         *dst = '\0';
 280
 281         protocol = urlParseProtocol(proto);
 282         port = urlDefaultPort(protocol);
 283
 284         /* Is there any login information? (we should eventually parse it above) */
 285         t = strrchr(host, '@');
 286         if (t != NULL) {
 287             strncpy((char *) login, (char *) host, sizeof(login)-1);
 288             login[sizeof(login)-1] = '\0';
 289             t = strrchr(login, '@');
 290             *t = 0;
 291             strncpy((char *) host, t + 1, sizeof(host)-1);
 292             host[sizeof(host)-1] = '\0';
 293         }
 294
 295         /* Is there any host information? (we should eventually parse it above) */
 296         if (*host == '[') {
 297             /* strip any IPA brackets. valid under IPv6. */
 298             dst = host;
 299             /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
 300             src = host;
 301             ++src;
 302             l = strlen(host);
 303             i = 1;
 304             for (; i < l && *src != ']' && *src != '\0'; ++i, ++src, ++dst) {
 305                 *dst = *src;
 306             }
 307
 308             /* we moved in-place, so truncate the actual hostname found */
 309             *dst = '\0';
 310             ++dst;
 311
 312             /* skip ahead to either start of port, or original EOS */
 313             while (*dst != '\0' && *dst != ':')
 314                 ++dst;
 315             t = dst;
 316         } else {
 317             t = strrchr(host, ':');
 318
 319             if (t != strchr(host,':') ) {
 320                 /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
 321                 /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
 322                 /* therefore we MUST accept the case where they are not bracketed at all. */
 323                 t = NULL;
 324             }
 325         }
 326
 327         // Bug 3183 sanity check: If scheme is present, host must be too.
 328         if (protocol != AnyP::PROTO_NONE && host[0] == '\0') {
 329             debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details.");
 330             return NULL;
 331         }
 332
 333         if (t && *t == ':') {
 334             *t = '\0';
 335             ++t;
 336             port = atoi(t);
 337         }
 338     }
 339
 340     for (t = host; *t; ++t)
 341         *t = xtolower(*t);
 342
 343     if (stringHasWhitespace(host)) {
 344         if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
 345             t = q = host;
 346             while (*t) {
 347                 if (!xisspace(*t)) {
 348                     *q = *t;
 349                     ++q;
 350                 }
 351                 ++t;
 352             }
 353             *q = '\0';
 354         }
 355     }
 356
 357     debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
 358
 359     if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
 360         debugs(23, DBG_IMPORTANT, "urlParse: Illegal character in hostname '" << host << "'");
 361         return NULL;
 362     }
 363
 364     /* For IPV6 addresses also check for a colon */
 365     if (Config.appendDomain && !strchr(host, '.') && !strchr(host, ':'))
 366         strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
 367
 368     /* remove trailing dots from hostnames */
 369     while ((l = strlen(host)) > 0 && host[--l] == '.')
 370         host[l] = '\0';
 371
 372     /* reject duplicate or leading dots */
 373     if (strstr(host, "..") || *host == '.') {
 374         debugs(23, DBG_IMPORTANT, "urlParse: Illegal hostname '" << host << "'");
 375         return NULL;
 376     }
 377
 378     if (port < 1 || port > 65535) {
 379         debugs(23, 3, "urlParse: Invalid port '" << port << "'");
 380         return NULL;
 381     }
 382
 383 #if HARDCODE_DENY_PORTS
 384     /* These ports are filtered in the default squid.conf, but
 385      * maybe someone wants them hardcoded... */
 386     if (port == 7 || port == 9 || port == 19) {
 387         debugs(23, DBG_CRITICAL, "urlParse: Deny access to port " << port);
 388         return NULL;
 389     }
 390 #endif
 391
 392     if (stringHasWhitespace(urlpath)) {
 393         debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
 394
 395         switch (Config.uri_whitespace) {
 396
 397         case URI_WHITESPACE_DENY:
 398             return NULL;
 399
 400         case URI_WHITESPACE_ALLOW:
 401             break;
 402
 403         case URI_WHITESPACE_ENCODE:
 404             t = rfc1738_escape_unescaped(urlpath);
 405             xstrncpy(urlpath, t, MAX_URL);
 406             break;
 407
 408         case URI_WHITESPACE_CHOP:
 409             *(urlpath + strcspn(urlpath, w_space)) = '\0';
 410             break;
 411
 412         case URI_WHITESPACE_STRIP:
 413         default:
 414             t = q = urlpath;
 415             while (*t) {
 416                 if (!xisspace(*t)) {
 417                     *q = *t;
 418                     ++q;
 419                 }
 420                 ++t;
 421             }
 422             *q = '\0';
 423         }
 424     }
 425
 426     return urlParseFinish(method, protocol, urlpath, host, login, port, request);
 427 }
 428
 429 /**
 430  * Update request with parsed URI data.  If the request arg is
 431  * non-NULL, put parsed values there instead of allocating a new
 432  * HttpRequest.
 433  */
 434 static HttpRequest *
 435 urlParseFinish(const HttpRequestMethod& method,
 436                const AnyP::ProtocolType protocol,
 437                const char *const urlpath,
 438                const char *const host,
 439                const char *const login,
 440                const int port,
 441                HttpRequest *request)
 442 {
 443     if (NULL == request)
 444         request = new HttpRequest(method, protocol, urlpath);
 445     else {
 446         request->initHTTP(method, protocol, urlpath);
 447         safe_free(request->canonical);
 448     }
 449
 450     request->SetHost(host);
 451     xstrncpy(request->login, login, MAX_LOGIN_SZ);
 452     request->port = (unsigned short) port;
 453     return request;
 454 }
 455
 456 static HttpRequest *
 457 urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request)
 458 {
 459     debugs(50, 5, "urnParse: " << urn);
 460     if (request) {
 461         request->initHTTP(method, AnyP::PROTO_URN, urn + 4);
 462         safe_free(request->canonical);
 463         return request;
 464     }
 465
 466     return new HttpRequest(method, AnyP::PROTO_URN, urn + 4);
 467 }
 468
 469 const char *
 470 urlCanonical(HttpRequest * request)
 471 {
 472     LOCAL_ARRAY(char, portbuf, 32);
 473     LOCAL_ARRAY(char, urlbuf, MAX_URL);
 474
 475     if (request->canonical)
 476         return request->canonical;
 477
 478     if (request->url.getScheme() == AnyP::PROTO_URN) {
 479         snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
 480                  SQUIDSTRINGPRINT(request->urlpath));
 481     } else {
 482         switch (request->method.id()) {
 483
 484         case Http::METHOD_CONNECT:
 485             snprintf(urlbuf, MAX_URL, "%s:%d", request->GetHost(), request->port);
 486             break;
 487
 488         default: {
 489             portbuf[0] = '\0';
 490
 491             if (request->port != urlDefaultPort(request->url.getScheme()))
 492                 snprintf(portbuf, 32, ":%d", request->port);
 493
 494             snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s" SQUIDSTRINGPH,
 495                      request->url.getScheme().c_str(),
 496                      request->login,
 497                      *request->login ? "@" : null_string,
 498                      request->GetHost(),
 499                      portbuf,
 500                      SQUIDSTRINGPRINT(request->urlpath));
 501         }
 502         }
 503     }
 504
 505     return (request->canonical = xstrdup(urlbuf));
 506 }
 507
 508 /** \todo AYJ: Performance: This is an *almost* duplicate of urlCanonical. But elides the query-string.
 509  *        After copying it on in the first place! Would be less code to merge the two with a flag parameter.
 510  *        and never copy the query-string part in the first place
 511  */
 512 char *
 513 urlCanonicalClean(const HttpRequest * request)
 514 {
 515     LOCAL_ARRAY(char, buf, MAX_URL);
 516     LOCAL_ARRAY(char, portbuf, 32);
 517     LOCAL_ARRAY(char, loginbuf, MAX_LOGIN_SZ + 1);
 518     char *t;
 519
 520     if (request->url.getScheme() == AnyP::PROTO_URN) {
 521         snprintf(buf, MAX_URL, "urn:" SQUIDSTRINGPH,
 522                  SQUIDSTRINGPRINT(request->urlpath));
 523     } else {
 524         switch (request->method.id()) {
 525
 526         case Http::METHOD_CONNECT:
 527             snprintf(buf, MAX_URL, "%s:%d", request->GetHost(), request->port);
 528             break;
 529
 530         default: {
 531             portbuf[0] = '\0';
 532
 533             if (request->port != urlDefaultPort(request->url.getScheme()))
 534                 snprintf(portbuf, 32, ":%d", request->port);
 535
 536             loginbuf[0] = '\0';
 537
 538             if ((int) strlen(request->login) > 0) {
 539                 strcpy(loginbuf, request->login);
 540
 541                 if ((t = strchr(loginbuf, ':')))
 542                     *t = '\0';
 543
 544                 strcat(loginbuf, "@");
 545             }
 546
 547             snprintf(buf, MAX_URL, "%s://%s%s%s" SQUIDSTRINGPH,
 548                      request->url.getScheme().c_str(),
 549                      loginbuf,
 550                      request->GetHost(),
 551                      portbuf,
 552                      SQUIDSTRINGPRINT(request->urlpath));
 553
 554             // strip arguments AFTER a question-mark
 555             if (Config.onoff.strip_query_terms)
 556                 if ((t = strchr(buf, '?')))
 557                     *(++t) = '\0';
 558         }
 559         }
 560     }
 561
 562     if (stringHasCntl(buf))
 563         xstrncpy(buf, rfc1738_escape_unescaped(buf), MAX_URL);
 564
 565     return buf;
 566 }
 567
 568 /**
 569  * Yet another alternative to urlCanonical.
 570  * This one adds the https:// parts to Http::METHOD_CONNECT URL
 571  * for use in error page outputs.
 572  * Luckily we can leverage the others instead of duplicating.
 573  */
 574 const char *
 575 urlCanonicalFakeHttps(const HttpRequest * request)
 576 {
 577     LOCAL_ARRAY(char, buf, MAX_URL);
 578
 579     // method CONNECT and port HTTPS
 580     if (request->method == Http::METHOD_CONNECT && request->port == 443) {
 581         snprintf(buf, MAX_URL, "https://%s/*", request->GetHost());
 582         return buf;
 583     }
 584
 585     // else do the normal complete canonical thing.
 586     return urlCanonicalClean(request);
 587 }
 588
 589 /*
 590  * Test if a URL is relative.
 591  *
 592  * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
 593  * appear before a ':'.
 594  */
 595 bool
 596 urlIsRelative(const char *url)
 597 {
 598     const char *p;
 599
 600     if (url == NULL) {
 601         return (false);
 602     }
 603     if (*url == '\0') {
 604         return (false);
 605     }
 606
 607     for (p = url; *p != '\0' && *p != ':' && *p != '/'; ++p);
 608
 609     if (*p == ':') {
 610         return (false);
 611     }
 612     return (true);
 613 }
 614
 615 /*
 616  * Convert a relative URL to an absolute URL using the context of a given
 617  * request.
 618  *
 619  * It is assumed that you have already ensured that the URL is relative.
 620  *
 621  * If NULL is returned it is an indication that the method in use in the
 622  * request does not distinguish between relative and absolute and you should
 623  * use the url unchanged.
 624  *
 625  * If non-NULL is returned, it is up to the caller to free the resulting
 626  * memory using safe_free().
 627  */
 628 char *
 629 urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
 630 {
 631
 632     if (req->method.id() == Http::METHOD_CONNECT) {
 633         return (NULL);
 634     }
 635
 636     char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
 637
 638     if (req->url.getScheme() == AnyP::PROTO_URN) {
 639         snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
 640                  SQUIDSTRINGPRINT(req->urlpath));
 641         return (urlbuf);
 642     }
 643
 644     size_t urllen;
 645
 646     if (req->port != urlDefaultPort(req->url.getScheme())) {
 647         urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s:%d",
 648                           req->url.getScheme().c_str(),
 649                           req->login,
 650                           *req->login ? "@" : null_string,
 651                           req->GetHost(),
 652                           req->port
 653                          );
 654     } else {
 655         urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s",
 656                           req->url.getScheme().c_str(),
 657                           req->login,
 658                           *req->login ? "@" : null_string,
 659                           req->GetHost()
 660                          );
 661     }
 662
 663     if (relUrl[0] == '/') {
 664         strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
 665     } else {
 666         const char *path = req->urlpath.termedBuf();
 667         const char *last_slash = strrchr(path, '/');
 668
 669         if (last_slash == NULL) {
 670             urlbuf[urllen] = '/';
 671             ++urllen;
 672             strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
 673         } else {
 674             ++last_slash;
 675             size_t pathlen = last_slash - path;
 676             if (pathlen > MAX_URL - urllen - 1) {
 677                 pathlen = MAX_URL - urllen - 1;
 678             }
 679             strncpy(&urlbuf[urllen], path, pathlen);
 680             urllen += pathlen;
 681             if (urllen + 1 < MAX_URL) {
 682                 strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
 683             }
 684         }
 685     }
 686
 687     return (urlbuf);
 688 }
 689
 690 /*
 691  * matchDomainName() compares a hostname with a domainname according
 692  * to the following rules:
 693  *
 694  *    HOST          DOMAIN        MATCH?
 695  * ------------- -------------    ------
 696  *    foo.com       foo.com         YES
 697  *   .foo.com       foo.com         YES
 698  *  x.foo.com       foo.com          NO
 699  *    foo.com      .foo.com         YES
 700  *   .foo.com      .foo.com         YES
 701  *  x.foo.com      .foo.com         YES
 702  *
 703  *  We strip leading dots on hosts (but not domains!) so that
 704  *  ".foo.com" is is always the same as "foo.com".
 705  *
 706  *  Return values:
 707  *     0 means the host matches the domain
 708  *     1 means the host is greater than the domain
 709  *    -1 means the host is less than the domain
 710  */
 711
 712 int
 713 matchDomainName(const char *h, const char *d)
 714 {
 715     int dl;
 716     int hl;
 717
 718     while ('.' == *h)
 719         ++h;
 720
 721     hl = strlen(h);
 722
 723     dl = strlen(d);
 724
 725     /*
 726      * Start at the ends of the two strings and work towards the
 727      * beginning.
 728      */
 729     while (xtolower(h[--hl]) == xtolower(d[--dl])) {
 730         if (hl == 0 && dl == 0) {
 731             /*
 732              * We made it all the way to the beginning of both
 733              * strings without finding any difference.
 734              */
 735             return 0;
 736         }
 737
 738         if (0 == hl) {
 739             /*
 740              * The host string is shorter than the domain string.
 741              * There is only one case when this can be a match.
 742              * If the domain is just one character longer, and if
 743              * that character is a leading '.' then we call it a
 744              * match.
 745              */
 746
 747             if (1 == dl && '.' == d[0])
 748                 return 0;
 749             else
 750                 return -1;
 751         }
 752
 753         if (0 == dl) {
 754             /*
 755              * The domain string is shorter than the host string.
 756              * This is a match only if the first domain character
 757              * is a leading '.'.
 758              */
 759
 760             if ('.' == d[0])
 761                 return 0;
 762             else
 763                 return 1;
 764         }
 765     }
 766
 767     /*
 768      * We found different characters in the same position (from the end).
 769      */
 770     /*
 771      * If one of those character is '.' then its special.  In order
 772      * for splay tree sorting to work properly, "x-foo.com" must
 773      * be greater than ".foo.com" even though '-' is less than '.'.
 774      */
 775     if ('.' == d[dl])
 776         return 1;
 777
 778     if ('.' == h[hl])
 779         return -1;
 780
 781     return (xtolower(h[hl]) - xtolower(d[dl]));
 782 }
 783
 784 /*
 785  * return true if we can serve requests for this method.
 786  */
 787 int
 788 urlCheckRequest(const HttpRequest * r)
 789 {
 790     int rc = 0;
 791     /* protocol "independent" methods
 792      *
 793      * actually these methods are specific to HTTP:
 794      * they are methods we recieve on our HTTP port,
 795      * and if we had a FTP listener would not be relevant
 796      * there.
 797      *
 798      * So, we should delegate them to HTTP. The problem is that we
 799      * do not have a default protocol from the client side of HTTP.
 800      */
 801
 802     if (r->method == Http::METHOD_CONNECT)
 803         return 1;
 804
 805     // we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
 806     // we also support forwarding OPTIONS and TRACE, except for the *-URI ones
 807     if (r->method == Http::METHOD_OPTIONS || r->method == Http::METHOD_TRACE)
 808         return (r->header.getInt64(HDR_MAX_FORWARDS) == 0 || r->urlpath != "*");
 809
 810     if (r->method == Http::METHOD_PURGE)
 811         return 1;
 812
 813     /* does method match the protocol? */
 814     switch (r->url.getScheme()) {
 815
 816     case AnyP::PROTO_URN:
 817
 818     case AnyP::PROTO_HTTP:
 819
 820     case AnyP::PROTO_CACHE_OBJECT:
 821         rc = 1;
 822         break;
 823
 824     case AnyP::PROTO_FTP:
 825
 826         if (r->method == Http::METHOD_PUT)
 827             rc = 1;
 828
 829     case AnyP::PROTO_GOPHER:
 830
 831     case AnyP::PROTO_WAIS:
 832
 833     case AnyP::PROTO_WHOIS:
 834         if (r->method == Http::METHOD_GET)
 835             rc = 1;
 836         else if (r->method == Http::METHOD_HEAD)
 837             rc = 1;
 838
 839         break;
 840
 841     case AnyP::PROTO_HTTPS:
 842 #if USE_OPENSSL
 843
 844         rc = 1;
 845
 846         break;
 847
 848 #else
 849         /*
 850         * Squid can't originate an SSL connection, so it should
 851         * never receive an "https:" URL.  It should always be
 852         * CONNECT instead.
 853         */
 854         rc = 0;
 855
 856 #endif
 857
 858     default:
 859         break;
 860     }
 861
 862     return rc;
 863 }
 864
 865 /*
 866  * Quick-n-dirty host extraction from a URL.  Steps:
 867  *      Look for a colon
 868  *      Skip any '/' after the colon
 869  *      Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
 870  *      Look for an ending '/' or ':' and terminate
 871  *      Look for login info preceeded by '@'
 872  */
 873
 874 class URLHostName
 875 {
 876
 877 public:
 878     char * extract(char const *url);
 879
 880 private:
 881     static char Host [SQUIDHOSTNAMELEN];
 882     void init(char const *);
 883     void findHostStart();
 884     void trimTrailingChars();
 885     void trimAuth();
 886     char const *hostStart;
 887     char const *url;
 888 };
 889
 890 char *
 891 urlHostname(const char *url)
 892 {
 893     return URLHostName().extract(url);
 894 }
 895
 896 char URLHostName::Host[SQUIDHOSTNAMELEN];
 897
 898 void
 899 URLHostName::init(char const *aUrl)
 900 {
 901     Host[0] = '\0';
 902     url = aUrl;
 903 }
 904
 905 void
 906 URLHostName::findHostStart()
 907 {
 908     if (NULL == (hostStart = strchr(url, ':')))
 909         return;
 910
 911     ++hostStart;
 912
 913     while (*hostStart != '\0' && *hostStart == '/')
 914         ++hostStart;
 915
 916     if (*hostStart == ']')
 917         ++hostStart;
 918 }
 919
 920 void
 921 URLHostName::trimTrailingChars()
 922 {
 923     char *t;
 924
 925     if ((t = strchr(Host, '/')))
 926         *t = '\0';
 927
 928     if ((t = strrchr(Host, ':')))
 929         *t = '\0';
 930
 931     if ((t = strchr(Host, ']')))
 932         *t = '\0';
 933 }
 934
 935 void
 936 URLHostName::trimAuth()
 937 {
 938     char *t;
 939
 940     if ((t = strrchr(Host, '@'))) {
 941         ++t;
 942         memmove(Host, t, strlen(t) + 1);
 943     }
 944 }
 945
 946 char *
 947 URLHostName::extract(char const *aUrl)
 948 {
 949     init(aUrl);
 950     findHostStart();
 951
 952     if (hostStart == NULL)
 953         return NULL;
 954
 955     xstrncpy(Host, hostStart, SQUIDHOSTNAMELEN);
 956
 957     trimTrailingChars();
 958
 959     trimAuth();
 960
 961     return Host;
 962 }