]> git.ipfire.org Git - thirdparty/squid.git/blob - src/url.cc
Merge from trunk rev.14687
[thirdparty/squid.git] / src / url.cc
1 /*
2 * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 23 URL Parsing */
10
11 #include "squid.h"
12 #include "globals.h"
13 #include "HttpRequest.h"
14 #include "rfc1738.h"
15 #include "SquidConfig.h"
16 #include "SquidString.h"
17 #include "URL.h"
18
19 static HttpRequest *urlParseFinish(const HttpRequestMethod& method,
20 const AnyP::ProtocolType protocol,
21 const char *const urlpath,
22 const char *const host,
23 const SBuf &login,
24 const int port,
25 HttpRequest *request);
26 static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request);
27 static const char valid_hostname_chars_u[] =
28 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
29 "abcdefghijklmnopqrstuvwxyz"
30 "0123456789-._"
31 "[:]"
32 ;
33 static const char valid_hostname_chars[] =
34 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
35 "abcdefghijklmnopqrstuvwxyz"
36 "0123456789-."
37 "[:]"
38 ;
39
40 const SBuf &
41 URL::Asterisk()
42 {
43 static SBuf star("*");
44 return star;
45 }
46
47 const SBuf &
48 URL::SlashPath()
49 {
50 static SBuf slash("/");
51 return slash;
52 }
53
54 void
55 URL::host(const char *src)
56 {
57 hostAddr_.setEmpty();
58 hostAddr_ = src;
59 if (hostAddr_.isAnyAddr()) {
60 xstrncpy(host_, src, sizeof(host_));
61 hostIsNumeric_ = false;
62 } else {
63 hostAddr_.toHostStr(host_, sizeof(host_));
64 debugs(23, 3, "given IP: " << hostAddr_);
65 hostIsNumeric_ = 1;
66 }
67 touch();
68 }
69
70 const SBuf &
71 URL::path() const
72 {
73 // RFC 3986 section 3.3 says path can be empty (path-abempty).
74 // RFC 7230 sections 2.7.3, 5.3.1, 5.7.2 - says path cannot be empty, default to "/"
75 // at least when sending and using. We must still accept path-abempty as input.
76 if (path_.isEmpty() && (scheme_ == AnyP::PROTO_HTTP || scheme_ == AnyP::PROTO_HTTPS))
77 return SlashPath();
78
79 return path_;
80 }
81
82 void
83 urlInitialize(void)
84 {
85 debugs(23, 5, "urlInitialize: Initializing...");
86 /* this ensures that the number of protocol strings is the same as
87 * the enum slots allocated because the last enum is always 'MAX'.
88 */
89 assert(strcmp(AnyP::ProtocolType_str[AnyP::PROTO_MAX], "MAX") == 0);
90 /*
91 * These test that our matchDomainName() function works the
92 * way we expect it to.
93 */
94 assert(0 == matchDomainName("foo.com", "foo.com"));
95 assert(0 == matchDomainName(".foo.com", "foo.com"));
96 assert(0 == matchDomainName("foo.com", ".foo.com"));
97 assert(0 == matchDomainName(".foo.com", ".foo.com"));
98 assert(0 == matchDomainName("x.foo.com", ".foo.com"));
99 assert(0 != matchDomainName("x.foo.com", "foo.com"));
100 assert(0 != matchDomainName("foo.com", "x.foo.com"));
101 assert(0 != matchDomainName("bar.com", "foo.com"));
102 assert(0 != matchDomainName(".bar.com", "foo.com"));
103 assert(0 != matchDomainName(".bar.com", ".foo.com"));
104 assert(0 != matchDomainName("bar.com", ".foo.com"));
105 assert(0 < matchDomainName("zzz.com", "foo.com"));
106 assert(0 > matchDomainName("aaa.com", "foo.com"));
107 assert(0 == matchDomainName("FOO.com", "foo.COM"));
108 assert(0 < matchDomainName("bfoo.com", "afoo.com"));
109 assert(0 > matchDomainName("afoo.com", "bfoo.com"));
110 assert(0 < matchDomainName("x-foo.com", ".foo.com"));
111 /* more cases? */
112 }
113
114 /**
115 * Parse the scheme name from string b, into protocol type.
116 * The string must be 0-terminated.
117 */
118 AnyP::ProtocolType
119 urlParseProtocol(const char *b)
120 {
121 // make e point to the ':' character
122 const char *e = b + strcspn(b, ":");
123 int len = e - b;
124
125 /* test common stuff first */
126
127 if (strncasecmp(b, "http", len) == 0)
128 return AnyP::PROTO_HTTP;
129
130 if (strncasecmp(b, "ftp", len) == 0)
131 return AnyP::PROTO_FTP;
132
133 if (strncasecmp(b, "https", len) == 0)
134 return AnyP::PROTO_HTTPS;
135
136 if (strncasecmp(b, "file", len) == 0)
137 return AnyP::PROTO_FTP;
138
139 if (strncasecmp(b, "coap", len) == 0)
140 return AnyP::PROTO_COAP;
141
142 if (strncasecmp(b, "coaps", len) == 0)
143 return AnyP::PROTO_COAPS;
144
145 if (strncasecmp(b, "gopher", len) == 0)
146 return AnyP::PROTO_GOPHER;
147
148 if (strncasecmp(b, "wais", len) == 0)
149 return AnyP::PROTO_WAIS;
150
151 if (strncasecmp(b, "cache_object", len) == 0)
152 return AnyP::PROTO_CACHE_OBJECT;
153
154 if (strncasecmp(b, "urn", len) == 0)
155 return AnyP::PROTO_URN;
156
157 if (strncasecmp(b, "whois", len) == 0)
158 return AnyP::PROTO_WHOIS;
159
160 return AnyP::PROTO_NONE;
161 }
162
163 /*
164 * Parse a URI/URL.
165 *
166 * If the 'request' arg is non-NULL, put parsed values there instead
167 * of allocating a new HttpRequest.
168 *
169 * This abuses HttpRequest as a way of representing the parsed url
170 * and its components.
171 * method is used to switch parsers and to init the HttpRequest.
172 * If method is Http::METHOD_CONNECT, then rather than a URL a hostname:port is
173 * looked for.
174 * The url is non const so that if its too long we can NULL-terminate it in place.
175 */
176
177 /*
178 * This routine parses a URL. Its assumed that the URL is complete -
179 * ie, the end of the string is the end of the URL. Don't pass a partial
180 * URL here as this routine doesn't have any way of knowing whether
181 * its partial or not (ie, it handles the case of no trailing slash as
182 * being "end of host with implied path of /".
183 */
184 HttpRequest *
185 urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
186 {
187 LOCAL_ARRAY(char, proto, MAX_URL);
188 LOCAL_ARRAY(char, login, MAX_URL);
189 LOCAL_ARRAY(char, host, MAX_URL);
190 LOCAL_ARRAY(char, urlpath, MAX_URL);
191 char *t = NULL;
192 char *q = NULL;
193 int port;
194 AnyP::ProtocolType protocol = AnyP::PROTO_NONE;
195 int l;
196 int i;
197 const char *src;
198 char *dst;
199 proto[0] = host[0] = urlpath[0] = login[0] = '\0';
200
201 if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
202 /* terminate so it doesn't overflow other buffers */
203 *(url + (MAX_URL >> 1)) = '\0';
204 debugs(23, DBG_IMPORTANT, "urlParse: URL too large (" << l << " bytes)");
205 return NULL;
206 }
207 if (method == Http::METHOD_CONNECT) {
208 port = CONNECT_PORT;
209
210 if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
211 if (sscanf(url, "%[^:]:%d", host, &port) < 1)
212 return NULL;
213
214 } else if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
215 URL::Asterisk().cmp(url) == 0) {
216 protocol = AnyP::PROTO_HTTP;
217 port = AnyP::UriScheme(protocol).defaultPort();
218 return urlParseFinish(method, protocol, url, host, SBuf(), port, request);
219 } else if (!strncmp(url, "urn:", 4)) {
220 return urnParse(method, url, request);
221 } else {
222 /* Parse the URL: */
223 src = url;
224 i = 0;
225 /* Find first : - everything before is protocol */
226 for (i = 0, dst = proto; i < l && *src != ':'; ++i, ++src, ++dst) {
227 *dst = *src;
228 }
229 if (i >= l)
230 return NULL;
231 *dst = '\0';
232
233 /* Then its :// */
234 if ((i+3) > l || *src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
235 return NULL;
236 i += 3;
237 src += 3;
238
239 /* Then everything until first /; thats host (and port; which we'll look for here later) */
240 // bug 1881: If we don't get a "/" then we imply it was there
241 // bug 3074: We could just be given a "?" or "#". These also imply "/"
242 // bug 3233: whitespace is also a hostname delimiter.
243 for (dst = host; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
244 *dst = *src;
245 }
246
247 /*
248 * We can't check for "i >= l" here because we could be at the end of the line
249 * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
250 * been -given- a valid URL and the path is just '/'.
251 */
252 if (i > l)
253 return NULL;
254 *dst = '\0';
255
256 // bug 3074: received 'path' starting with '?', '#', or '\0' implies '/'
257 if (*src == '?' || *src == '#' || *src == '\0') {
258 urlpath[0] = '/';
259 dst = &urlpath[1];
260 } else {
261 dst = urlpath;
262 }
263 /* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
264 for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
265 *dst = *src;
266 }
267
268 /* We -could- be at the end of the buffer here */
269 if (i > l)
270 return NULL;
271 /* If the URL path is empty we set it to be "/" */
272 if (dst == urlpath) {
273 *dst = '/';
274 ++dst;
275 }
276 *dst = '\0';
277
278 protocol = urlParseProtocol(proto);
279 port = AnyP::UriScheme(protocol).defaultPort();
280
281 /* Is there any login information? (we should eventually parse it above) */
282 t = strrchr(host, '@');
283 if (t != NULL) {
284 strncpy((char *) login, (char *) host, sizeof(login)-1);
285 login[sizeof(login)-1] = '\0';
286 t = strrchr(login, '@');
287 *t = 0;
288 strncpy((char *) host, t + 1, sizeof(host)-1);
289 host[sizeof(host)-1] = '\0';
290 // Bug 4498: URL-unescape the login info after extraction
291 rfc1738_unescape(login);
292 }
293
294 /* Is there any host information? (we should eventually parse it above) */
295 if (*host == '[') {
296 /* strip any IPA brackets. valid under IPv6. */
297 dst = host;
298 /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
299 src = host;
300 ++src;
301 l = strlen(host);
302 i = 1;
303 for (; i < l && *src != ']' && *src != '\0'; ++i, ++src, ++dst) {
304 *dst = *src;
305 }
306
307 /* we moved in-place, so truncate the actual hostname found */
308 *dst = '\0';
309 ++dst;
310
311 /* skip ahead to either start of port, or original EOS */
312 while (*dst != '\0' && *dst != ':')
313 ++dst;
314 t = dst;
315 } else {
316 t = strrchr(host, ':');
317
318 if (t != strchr(host,':') ) {
319 /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
320 /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
321 /* therefore we MUST accept the case where they are not bracketed at all. */
322 t = NULL;
323 }
324 }
325
326 // Bug 3183 sanity check: If scheme is present, host must be too.
327 if (protocol != AnyP::PROTO_NONE && host[0] == '\0') {
328 debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details.");
329 return NULL;
330 }
331
332 if (t && *t == ':') {
333 *t = '\0';
334 ++t;
335 port = atoi(t);
336 }
337 }
338
339 for (t = host; *t; ++t)
340 *t = xtolower(*t);
341
342 if (stringHasWhitespace(host)) {
343 if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
344 t = q = host;
345 while (*t) {
346 if (!xisspace(*t)) {
347 *q = *t;
348 ++q;
349 }
350 ++t;
351 }
352 *q = '\0';
353 }
354 }
355
356 debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
357
358 if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
359 debugs(23, DBG_IMPORTANT, "urlParse: Illegal character in hostname '" << host << "'");
360 return NULL;
361 }
362
363 /* For IPV6 addresses also check for a colon */
364 if (Config.appendDomain && !strchr(host, '.') && !strchr(host, ':'))
365 strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
366
367 /* remove trailing dots from hostnames */
368 while ((l = strlen(host)) > 0 && host[--l] == '.')
369 host[l] = '\0';
370
371 /* reject duplicate or leading dots */
372 if (strstr(host, "..") || *host == '.') {
373 debugs(23, DBG_IMPORTANT, "urlParse: Illegal hostname '" << host << "'");
374 return NULL;
375 }
376
377 if (port < 1 || port > 65535) {
378 debugs(23, 3, "urlParse: Invalid port '" << port << "'");
379 return NULL;
380 }
381
382 #if HARDCODE_DENY_PORTS
383 /* These ports are filtered in the default squid.conf, but
384 * maybe someone wants them hardcoded... */
385 if (port == 7 || port == 9 || port == 19) {
386 debugs(23, DBG_CRITICAL, "urlParse: Deny access to port " << port);
387 return NULL;
388 }
389 #endif
390
391 if (stringHasWhitespace(urlpath)) {
392 debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
393
394 switch (Config.uri_whitespace) {
395
396 case URI_WHITESPACE_DENY:
397 return NULL;
398
399 case URI_WHITESPACE_ALLOW:
400 break;
401
402 case URI_WHITESPACE_ENCODE:
403 t = rfc1738_escape_unescaped(urlpath);
404 xstrncpy(urlpath, t, MAX_URL);
405 break;
406
407 case URI_WHITESPACE_CHOP:
408 *(urlpath + strcspn(urlpath, w_space)) = '\0';
409 break;
410
411 case URI_WHITESPACE_STRIP:
412 default:
413 t = q = urlpath;
414 while (*t) {
415 if (!xisspace(*t)) {
416 *q = *t;
417 ++q;
418 }
419 ++t;
420 }
421 *q = '\0';
422 }
423 }
424
425 return urlParseFinish(method, protocol, urlpath, host, SBuf(login), port, request);
426 }
427
428 /**
429 * Update request with parsed URI data. If the request arg is
430 * non-NULL, put parsed values there instead of allocating a new
431 * HttpRequest.
432 */
433 static HttpRequest *
434 urlParseFinish(const HttpRequestMethod& method,
435 const AnyP::ProtocolType protocol,
436 const char *const urlpath,
437 const char *const host,
438 const SBuf &login,
439 const int port,
440 HttpRequest *request)
441 {
442 if (NULL == request)
443 request = new HttpRequest(method, protocol, urlpath);
444 else {
445 request->initHTTP(method, protocol, urlpath);
446 }
447
448 request->url.host(host);
449 request->url.userInfo(login);
450 request->url.port(port);
451 return request;
452 }
453
454 static HttpRequest *
455 urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request)
456 {
457 debugs(50, 5, "urnParse: " << urn);
458 if (request) {
459 request->initHTTP(method, AnyP::PROTO_URN, urn + 4);
460 return request;
461 }
462
463 return new HttpRequest(method, AnyP::PROTO_URN, urn + 4);
464 }
465
466 void
467 URL::touch()
468 {
469 absolute_.clear();
470 authorityHttp_.clear();
471 authorityWithPort_.clear();
472 }
473
474 SBuf &
475 URL::authority(bool requirePort) const
476 {
477 if (authorityHttp_.isEmpty()) {
478
479 // both formats contain Host/IP
480 authorityWithPort_.append(host());
481 authorityHttp_ = authorityWithPort_;
482
483 // authorityForm_ only has :port if it is non-default
484 authorityWithPort_.appendf(":%u",port());
485 if (port() != getScheme().defaultPort())
486 authorityHttp_ = authorityWithPort_;
487 }
488
489 return requirePort ? authorityWithPort_ : authorityHttp_;
490 }
491
492 SBuf &
493 URL::absolute() const
494 {
495 if (absolute_.isEmpty()) {
496 // TODO: most URL will be much shorter, avoid allocating this much
497 absolute_.reserveCapacity(MAX_URL);
498
499 absolute_.appendf("%s:", getScheme().c_str());
500 if (getScheme() != AnyP::PROTO_URN) {
501 absolute_.append("//", 2);
502 const bool omitUserInfo = getScheme() == AnyP::PROTO_HTTP ||
503 getScheme() != AnyP::PROTO_HTTPS ||
504 userInfo().isEmpty();
505 if (!omitUserInfo) {
506 absolute_.append(userInfo());
507 absolute_.append("@", 1);
508 }
509 absolute_.append(authority());
510 }
511 absolute_.append(path());
512 }
513
514 return absolute_;
515 }
516
517 /** \todo AYJ: Performance: This is an *almost* duplicate of HttpRequest::effectiveRequestUri(). But elides the query-string.
518 * After copying it on in the first place! Would be less code to merge the two with a flag parameter.
519 * and never copy the query-string part in the first place
520 */
521 char *
522 urlCanonicalClean(const HttpRequest * request)
523 {
524 LOCAL_ARRAY(char, buf, MAX_URL);
525
526 snprintf(buf, sizeof(buf), SQUIDSBUFPH, SQUIDSBUFPRINT(request->effectiveRequestUri()));
527 buf[sizeof(buf)-1] = '\0';
528
529 // URN, CONNECT method, and non-stripped URIs can go straight out
530 if (Config.onoff.strip_query_terms && !(request->method == Http::METHOD_CONNECT || request->url.getScheme() == AnyP::PROTO_URN)) {
531 // strip anything AFTER a question-mark
532 // leaving the '?' in place
533 if (auto t = strchr(buf, '?')) {
534 *(++t) = '\0';
535 }
536 }
537
538 if (stringHasCntl(buf))
539 xstrncpy(buf, rfc1738_escape_unescaped(buf), MAX_URL);
540
541 return buf;
542 }
543
544 /**
545 * Yet another alternative to urlCanonical.
546 * This one adds the https:// parts to Http::METHOD_CONNECT URL
547 * for use in error page outputs.
548 * Luckily we can leverage the others instead of duplicating.
549 */
550 const char *
551 urlCanonicalFakeHttps(const HttpRequest * request)
552 {
553 LOCAL_ARRAY(char, buf, MAX_URL);
554
555 // method CONNECT and port HTTPS
556 if (request->method == Http::METHOD_CONNECT && request->url.port() == 443) {
557 snprintf(buf, MAX_URL, "https://%s/*", request->url.host());
558 return buf;
559 }
560
561 // else do the normal complete canonical thing.
562 return urlCanonicalClean(request);
563 }
564
565 /*
566 * Test if a URL is relative.
567 *
568 * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
569 * appear before a ':'.
570 */
571 bool
572 urlIsRelative(const char *url)
573 {
574 const char *p;
575
576 if (url == NULL) {
577 return (false);
578 }
579 if (*url == '\0') {
580 return (false);
581 }
582
583 for (p = url; *p != '\0' && *p != ':' && *p != '/'; ++p);
584
585 if (*p == ':') {
586 return (false);
587 }
588 return (true);
589 }
590
591 /*
592 * Convert a relative URL to an absolute URL using the context of a given
593 * request.
594 *
595 * It is assumed that you have already ensured that the URL is relative.
596 *
597 * If NULL is returned it is an indication that the method in use in the
598 * request does not distinguish between relative and absolute and you should
599 * use the url unchanged.
600 *
601 * If non-NULL is returned, it is up to the caller to free the resulting
602 * memory using safe_free().
603 */
604 char *
605 urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
606 {
607
608 if (req->method.id() == Http::METHOD_CONNECT) {
609 return (NULL);
610 }
611
612 char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
613
614 if (req->url.getScheme() == AnyP::PROTO_URN) {
615 // XXX: this is what the original code did, but it seems to break the
616 // intended behaviour of this function. It returns the stored URN path,
617 // not converting the given one into a URN...
618 snprintf(urlbuf, MAX_URL, SQUIDSBUFPH, SQUIDSBUFPRINT(req->url.absolute()));
619 return (urlbuf);
620 }
621
622 SBuf authorityForm = req->url.authority(); // host[:port]
623 size_t urllen = snprintf(urlbuf, MAX_URL, "%s://" SQUIDSBUFPH "%s" SQUIDSBUFPH,
624 req->url.getScheme().c_str(),
625 SQUIDSBUFPRINT(req->url.userInfo()),
626 !req->url.userInfo().isEmpty() ? "@" : "",
627 SQUIDSBUFPRINT(authorityForm));
628
629 // if the first char is '/' assume its a relative path
630 // XXX: this breaks on scheme-relative URLs,
631 // but we should not see those outside ESI, and rarely there.
632 // XXX: also breaks on any URL containing a '/' in the query-string portion
633 if (relUrl[0] == '/') {
634 xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
635 } else {
636 SBuf path = req->url.path();
637 SBuf::size_type lastSlashPos = path.rfind('/');
638
639 if (lastSlashPos == SBuf::npos) {
640 // replace the whole path with the given bit(s)
641 urlbuf[urllen] = '/';
642 ++urllen;
643 xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
644 } else {
645 // replace only the last (file?) segment with the given bit(s)
646 ++lastSlashPos;
647 if (lastSlashPos > MAX_URL - urllen - 1) {
648 // XXX: crops bits in the middle of the combined URL.
649 lastSlashPos = MAX_URL - urllen - 1;
650 }
651 SBufToCstring(&urlbuf[urllen], path.substr(0,lastSlashPos));
652 urllen += lastSlashPos;
653 if (urllen + 1 < MAX_URL) {
654 xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
655 }
656 }
657 }
658
659 return (urlbuf);
660 }
661
662 int
663 matchDomainName(const char *h, const char *d, bool honorWildcards)
664 {
665 int dl;
666 int hl;
667
668 while ('.' == *h)
669 ++h;
670
671 hl = strlen(h);
672
673 dl = strlen(d);
674
675 /*
676 * Start at the ends of the two strings and work towards the
677 * beginning.
678 */
679 while (xtolower(h[--hl]) == xtolower(d[--dl])) {
680 if (hl == 0 && dl == 0) {
681 /*
682 * We made it all the way to the beginning of both
683 * strings without finding any difference.
684 */
685 return 0;
686 }
687
688 if (0 == hl) {
689 /*
690 * The host string is shorter than the domain string.
691 * There is only one case when this can be a match.
692 * If the domain is just one character longer, and if
693 * that character is a leading '.' then we call it a
694 * match.
695 */
696
697 if (1 == dl && '.' == d[0])
698 return 0;
699 else
700 return -1;
701 }
702
703 if (0 == dl) {
704 /*
705 * The domain string is shorter than the host string.
706 * This is a match only if the first domain character
707 * is a leading '.'.
708 */
709
710 if ('.' == d[0])
711 return 0;
712 else
713 return 1;
714 }
715 }
716
717 /*
718 * We found different characters in the same position (from the end).
719 */
720
721 // If the h has a form of "*.foo.com" and d has a form of "x.foo.com"
722 // then the h[hl] points to '*', h[hl+1] to '.' and d[dl] to 'x'
723 // The following checks are safe, the "h[hl + 1]" in the worst case is '\0'.
724 if (honorWildcards && h[hl] == '*' && h[hl + 1] == '.')
725 return 0;
726
727 /*
728 * If one of those character is '.' then its special. In order
729 * for splay tree sorting to work properly, "x-foo.com" must
730 * be greater than ".foo.com" even though '-' is less than '.'.
731 */
732 if ('.' == d[dl])
733 return 1;
734
735 if ('.' == h[hl])
736 return -1;
737
738 return (xtolower(h[hl]) - xtolower(d[dl]));
739 }
740
741 /*
742 * return true if we can serve requests for this method.
743 */
744 int
745 urlCheckRequest(const HttpRequest * r)
746 {
747 int rc = 0;
748 /* protocol "independent" methods
749 *
750 * actually these methods are specific to HTTP:
751 * they are methods we recieve on our HTTP port,
752 * and if we had a FTP listener would not be relevant
753 * there.
754 *
755 * So, we should delegate them to HTTP. The problem is that we
756 * do not have a default protocol from the client side of HTTP.
757 */
758
759 if (r->method == Http::METHOD_CONNECT)
760 return 1;
761
762 // we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
763 // we also support forwarding OPTIONS and TRACE, except for the *-URI ones
764 if (r->method == Http::METHOD_OPTIONS || r->method == Http::METHOD_TRACE)
765 return (r->header.getInt64(Http::HdrType::MAX_FORWARDS) == 0 || r->url.path() != URL::Asterisk());
766
767 if (r->method == Http::METHOD_PURGE)
768 return 1;
769
770 /* does method match the protocol? */
771 switch (r->url.getScheme()) {
772
773 case AnyP::PROTO_URN:
774
775 case AnyP::PROTO_HTTP:
776
777 case AnyP::PROTO_CACHE_OBJECT:
778 rc = 1;
779 break;
780
781 case AnyP::PROTO_FTP:
782
783 if (r->method == Http::METHOD_PUT)
784 rc = 1;
785
786 case AnyP::PROTO_GOPHER:
787
788 case AnyP::PROTO_WAIS:
789
790 case AnyP::PROTO_WHOIS:
791 if (r->method == Http::METHOD_GET)
792 rc = 1;
793 else if (r->method == Http::METHOD_HEAD)
794 rc = 1;
795
796 break;
797
798 case AnyP::PROTO_HTTPS:
799 #if USE_OPENSSL
800
801 rc = 1;
802
803 break;
804
805 #else
806 /*
807 * Squid can't originate an SSL connection, so it should
808 * never receive an "https:" URL. It should always be
809 * CONNECT instead.
810 */
811 rc = 0;
812
813 #endif
814
815 default:
816 break;
817 }
818
819 return rc;
820 }
821
822 /*
823 * Quick-n-dirty host extraction from a URL. Steps:
824 * Look for a colon
825 * Skip any '/' after the colon
826 * Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
827 * Look for an ending '/' or ':' and terminate
828 * Look for login info preceeded by '@'
829 */
830
831 class URLHostName
832 {
833
834 public:
835 char * extract(char const *url);
836
837 private:
838 static char Host [SQUIDHOSTNAMELEN];
839 void init(char const *);
840 void findHostStart();
841 void trimTrailingChars();
842 void trimAuth();
843 char const *hostStart;
844 char const *url;
845 };
846
847 char *
848 urlHostname(const char *url)
849 {
850 return URLHostName().extract(url);
851 }
852
853 char URLHostName::Host[SQUIDHOSTNAMELEN];
854
855 void
856 URLHostName::init(char const *aUrl)
857 {
858 Host[0] = '\0';
859 url = aUrl;
860 }
861
862 void
863 URLHostName::findHostStart()
864 {
865 if (NULL == (hostStart = strchr(url, ':')))
866 return;
867
868 ++hostStart;
869
870 while (*hostStart != '\0' && *hostStart == '/')
871 ++hostStart;
872
873 if (*hostStart == ']')
874 ++hostStart;
875 }
876
877 void
878 URLHostName::trimTrailingChars()
879 {
880 char *t;
881
882 if ((t = strchr(Host, '/')))
883 *t = '\0';
884
885 if ((t = strrchr(Host, ':')))
886 *t = '\0';
887
888 if ((t = strchr(Host, ']')))
889 *t = '\0';
890 }
891
892 void
893 URLHostName::trimAuth()
894 {
895 char *t;
896
897 if ((t = strrchr(Host, '@'))) {
898 ++t;
899 memmove(Host, t, strlen(t) + 1);
900 }
901 }
902
903 char *
904 URLHostName::extract(char const *aUrl)
905 {
906 init(aUrl);
907 findHostStart();
908
909 if (hostStart == NULL)
910 return NULL;
911
912 xstrncpy(Host, hostStart, SQUIDHOSTNAMELEN);
913
914 trimTrailingChars();
915
916 trimAuth();
917
918 return Host;
919 }
920
921 URL::URL(AnyP::UriScheme const &aScheme) :
922 scheme_(aScheme),
923 hostIsNumeric_(false),
924 port_(0)
925 {
926 *host_=0;
927 }
928