]> git.ipfire.org Git - thirdparty/squid.git/blob - src/url.cc
Cleanup: remove many needless references to SSL
[thirdparty/squid.git] / src / url.cc
1 /*
2 * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 23 URL Parsing */
10
11 #include "squid.h"
12 #include "globals.h"
13 #include "HttpRequest.h"
14 #include "rfc1738.h"
15 #include "SquidConfig.h"
16 #include "SquidString.h"
17 #include "URL.h"
18
19 static HttpRequest *urlParseFinish(const HttpRequestMethod& method,
20 const AnyP::ProtocolType protocol,
21 const char *const protoStr,
22 const char *const urlpath,
23 const char *const host,
24 const SBuf &login,
25 const int port,
26 HttpRequest *request);
27 static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request);
28 static const char valid_hostname_chars_u[] =
29 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
30 "abcdefghijklmnopqrstuvwxyz"
31 "0123456789-._"
32 "[:]"
33 ;
34 static const char valid_hostname_chars[] =
35 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
36 "abcdefghijklmnopqrstuvwxyz"
37 "0123456789-."
38 "[:]"
39 ;
40
41 const SBuf &
42 URL::Asterisk()
43 {
44 static SBuf star("*");
45 return star;
46 }
47
48 const SBuf &
49 URL::SlashPath()
50 {
51 static SBuf slash("/");
52 return slash;
53 }
54
55 void
56 URL::host(const char *src)
57 {
58 hostAddr_.setEmpty();
59 hostAddr_ = src;
60 if (hostAddr_.isAnyAddr()) {
61 xstrncpy(host_, src, sizeof(host_));
62 hostIsNumeric_ = false;
63 } else {
64 hostAddr_.toHostStr(host_, sizeof(host_));
65 debugs(23, 3, "given IP: " << hostAddr_);
66 hostIsNumeric_ = 1;
67 }
68 touch();
69 }
70
71 const SBuf &
72 URL::path() const
73 {
74 // RFC 3986 section 3.3 says path can be empty (path-abempty).
75 // RFC 7230 sections 2.7.3, 5.3.1, 5.7.2 - says path cannot be empty, default to "/"
76 // at least when sending and using. We must still accept path-abempty as input.
77 if (path_.isEmpty() && (scheme_ == AnyP::PROTO_HTTP || scheme_ == AnyP::PROTO_HTTPS))
78 return SlashPath();
79
80 return path_;
81 }
82
83 void
84 urlInitialize(void)
85 {
86 debugs(23, 5, "urlInitialize: Initializing...");
87 /* this ensures that the number of protocol strings is the same as
88 * the enum slots allocated because the last enum is always 'MAX'.
89 */
90 assert(strcmp(AnyP::ProtocolType_str[AnyP::PROTO_MAX], "MAX") == 0);
91 /*
92 * These test that our matchDomainName() function works the
93 * way we expect it to.
94 */
95 assert(0 == matchDomainName("foo.com", "foo.com"));
96 assert(0 == matchDomainName(".foo.com", "foo.com"));
97 assert(0 == matchDomainName("foo.com", ".foo.com"));
98 assert(0 == matchDomainName(".foo.com", ".foo.com"));
99 assert(0 == matchDomainName("x.foo.com", ".foo.com"));
100 assert(0 != matchDomainName("x.foo.com", "foo.com"));
101 assert(0 != matchDomainName("foo.com", "x.foo.com"));
102 assert(0 != matchDomainName("bar.com", "foo.com"));
103 assert(0 != matchDomainName(".bar.com", "foo.com"));
104 assert(0 != matchDomainName(".bar.com", ".foo.com"));
105 assert(0 != matchDomainName("bar.com", ".foo.com"));
106 assert(0 < matchDomainName("zzz.com", "foo.com"));
107 assert(0 > matchDomainName("aaa.com", "foo.com"));
108 assert(0 == matchDomainName("FOO.com", "foo.COM"));
109 assert(0 < matchDomainName("bfoo.com", "afoo.com"));
110 assert(0 > matchDomainName("afoo.com", "bfoo.com"));
111 assert(0 < matchDomainName("x-foo.com", ".foo.com"));
112 /* more cases? */
113 }
114
115 /**
116 * Parse the scheme name from string b, into protocol type.
117 * The string must be 0-terminated.
118 */
119 AnyP::ProtocolType
120 urlParseProtocol(const char *b)
121 {
122 // make e point to the ':' character
123 const char *e = b + strcspn(b, ":");
124 int len = e - b;
125
126 /* test common stuff first */
127
128 if (strncasecmp(b, "http", len) == 0)
129 return AnyP::PROTO_HTTP;
130
131 if (strncasecmp(b, "ftp", len) == 0)
132 return AnyP::PROTO_FTP;
133
134 if (strncasecmp(b, "https", len) == 0)
135 return AnyP::PROTO_HTTPS;
136
137 if (strncasecmp(b, "file", len) == 0)
138 return AnyP::PROTO_FTP;
139
140 if (strncasecmp(b, "coap", len) == 0)
141 return AnyP::PROTO_COAP;
142
143 if (strncasecmp(b, "coaps", len) == 0)
144 return AnyP::PROTO_COAPS;
145
146 if (strncasecmp(b, "gopher", len) == 0)
147 return AnyP::PROTO_GOPHER;
148
149 if (strncasecmp(b, "wais", len) == 0)
150 return AnyP::PROTO_WAIS;
151
152 if (strncasecmp(b, "cache_object", len) == 0)
153 return AnyP::PROTO_CACHE_OBJECT;
154
155 if (strncasecmp(b, "urn", len) == 0)
156 return AnyP::PROTO_URN;
157
158 if (strncasecmp(b, "whois", len) == 0)
159 return AnyP::PROTO_WHOIS;
160
161 if (len > 0)
162 return AnyP::PROTO_UNKNOWN;
163
164 return AnyP::PROTO_NONE;
165 }
166
167 /*
168 * Parse a URI/URL.
169 *
170 * If the 'request' arg is non-NULL, put parsed values there instead
171 * of allocating a new HttpRequest.
172 *
173 * This abuses HttpRequest as a way of representing the parsed url
174 * and its components.
175 * method is used to switch parsers and to init the HttpRequest.
176 * If method is Http::METHOD_CONNECT, then rather than a URL a hostname:port is
177 * looked for.
178 * The url is non const so that if its too long we can NULL-terminate it in place.
179 */
180
181 /*
182 * This routine parses a URL. Its assumed that the URL is complete -
183 * ie, the end of the string is the end of the URL. Don't pass a partial
184 * URL here as this routine doesn't have any way of knowing whether
185 * its partial or not (ie, it handles the case of no trailing slash as
186 * being "end of host with implied path of /".
187 */
188 HttpRequest *
189 urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
190 {
191 LOCAL_ARRAY(char, proto, MAX_URL);
192 LOCAL_ARRAY(char, login, MAX_URL);
193 LOCAL_ARRAY(char, host, MAX_URL);
194 LOCAL_ARRAY(char, urlpath, MAX_URL);
195 char *t = NULL;
196 char *q = NULL;
197 int port;
198 AnyP::ProtocolType protocol = AnyP::PROTO_NONE;
199 int l;
200 int i;
201 const char *src;
202 char *dst;
203 proto[0] = host[0] = urlpath[0] = login[0] = '\0';
204
205 if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
206 /* terminate so it doesn't overflow other buffers */
207 *(url + (MAX_URL >> 1)) = '\0';
208 debugs(23, DBG_IMPORTANT, "urlParse: URL too large (" << l << " bytes)");
209 return NULL;
210 }
211 if (method == Http::METHOD_CONNECT) {
212 port = CONNECT_PORT;
213
214 if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
215 if (sscanf(url, "%[^:]:%d", host, &port) < 1)
216 return NULL;
217
218 } else if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
219 URL::Asterisk().cmp(url) == 0) {
220 protocol = AnyP::PROTO_HTTP;
221 port = 80; // or the slow way ... AnyP::UriScheme(protocol,"http").defaultPort();
222 return urlParseFinish(method, protocol, "http", url, host, SBuf(), port, request);
223 } else if (!strncmp(url, "urn:", 4)) {
224 return urnParse(method, url, request);
225 } else {
226 /* Parse the URL: */
227 src = url;
228 i = 0;
229 /* Find first : - everything before is protocol */
230 for (i = 0, dst = proto; i < l && *src != ':'; ++i, ++src, ++dst) {
231 *dst = *src;
232 }
233 if (i >= l)
234 return NULL;
235 *dst = '\0';
236
237 /* Then its :// */
238 if ((i+3) > l || *src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
239 return NULL;
240 i += 3;
241 src += 3;
242
243 /* Then everything until first /; thats host (and port; which we'll look for here later) */
244 // bug 1881: If we don't get a "/" then we imply it was there
245 // bug 3074: We could just be given a "?" or "#". These also imply "/"
246 // bug 3233: whitespace is also a hostname delimiter.
247 for (dst = host; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
248 *dst = *src;
249 }
250
251 /*
252 * We can't check for "i >= l" here because we could be at the end of the line
253 * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
254 * been -given- a valid URL and the path is just '/'.
255 */
256 if (i > l)
257 return NULL;
258 *dst = '\0';
259
260 // bug 3074: received 'path' starting with '?', '#', or '\0' implies '/'
261 if (*src == '?' || *src == '#' || *src == '\0') {
262 urlpath[0] = '/';
263 dst = &urlpath[1];
264 } else {
265 dst = urlpath;
266 }
267 /* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
268 for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
269 *dst = *src;
270 }
271
272 /* We -could- be at the end of the buffer here */
273 if (i > l)
274 return NULL;
275 /* If the URL path is empty we set it to be "/" */
276 if (dst == urlpath) {
277 *dst = '/';
278 ++dst;
279 }
280 *dst = '\0';
281
282 protocol = urlParseProtocol(proto);
283 port = AnyP::UriScheme(protocol).defaultPort();
284
285 /* Is there any login information? (we should eventually parse it above) */
286 t = strrchr(host, '@');
287 if (t != NULL) {
288 strncpy((char *) login, (char *) host, sizeof(login)-1);
289 login[sizeof(login)-1] = '\0';
290 t = strrchr(login, '@');
291 *t = 0;
292 strncpy((char *) host, t + 1, sizeof(host)-1);
293 host[sizeof(host)-1] = '\0';
294 // Bug 4498: URL-unescape the login info after extraction
295 rfc1738_unescape(login);
296 }
297
298 /* Is there any host information? (we should eventually parse it above) */
299 if (*host == '[') {
300 /* strip any IPA brackets. valid under IPv6. */
301 dst = host;
302 /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
303 src = host;
304 ++src;
305 l = strlen(host);
306 i = 1;
307 for (; i < l && *src != ']' && *src != '\0'; ++i, ++src, ++dst) {
308 *dst = *src;
309 }
310
311 /* we moved in-place, so truncate the actual hostname found */
312 *dst = '\0';
313 ++dst;
314
315 /* skip ahead to either start of port, or original EOS */
316 while (*dst != '\0' && *dst != ':')
317 ++dst;
318 t = dst;
319 } else {
320 t = strrchr(host, ':');
321
322 if (t != strchr(host,':') ) {
323 /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
324 /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
325 /* therefore we MUST accept the case where they are not bracketed at all. */
326 t = NULL;
327 }
328 }
329
330 // Bug 3183 sanity check: If scheme is present, host must be too.
331 if (protocol != AnyP::PROTO_NONE && host[0] == '\0') {
332 debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details.");
333 return NULL;
334 }
335
336 if (t && *t == ':') {
337 *t = '\0';
338 ++t;
339 port = atoi(t);
340 }
341 }
342
343 for (t = host; *t; ++t)
344 *t = xtolower(*t);
345
346 if (stringHasWhitespace(host)) {
347 if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
348 t = q = host;
349 while (*t) {
350 if (!xisspace(*t)) {
351 *q = *t;
352 ++q;
353 }
354 ++t;
355 }
356 *q = '\0';
357 }
358 }
359
360 debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
361
362 if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
363 debugs(23, DBG_IMPORTANT, "urlParse: Illegal character in hostname '" << host << "'");
364 return NULL;
365 }
366
367 /* For IPV6 addresses also check for a colon */
368 if (Config.appendDomain && !strchr(host, '.') && !strchr(host, ':'))
369 strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
370
371 /* remove trailing dots from hostnames */
372 while ((l = strlen(host)) > 0 && host[--l] == '.')
373 host[l] = '\0';
374
375 /* reject duplicate or leading dots */
376 if (strstr(host, "..") || *host == '.') {
377 debugs(23, DBG_IMPORTANT, "urlParse: Illegal hostname '" << host << "'");
378 return NULL;
379 }
380
381 if (port < 1 || port > 65535) {
382 debugs(23, 3, "urlParse: Invalid port '" << port << "'");
383 return NULL;
384 }
385
386 #if HARDCODE_DENY_PORTS
387 /* These ports are filtered in the default squid.conf, but
388 * maybe someone wants them hardcoded... */
389 if (port == 7 || port == 9 || port == 19) {
390 debugs(23, DBG_CRITICAL, "urlParse: Deny access to port " << port);
391 return NULL;
392 }
393 #endif
394
395 if (stringHasWhitespace(urlpath)) {
396 debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
397
398 switch (Config.uri_whitespace) {
399
400 case URI_WHITESPACE_DENY:
401 return NULL;
402
403 case URI_WHITESPACE_ALLOW:
404 break;
405
406 case URI_WHITESPACE_ENCODE:
407 t = rfc1738_escape_unescaped(urlpath);
408 xstrncpy(urlpath, t, MAX_URL);
409 break;
410
411 case URI_WHITESPACE_CHOP:
412 *(urlpath + strcspn(urlpath, w_space)) = '\0';
413 break;
414
415 case URI_WHITESPACE_STRIP:
416 default:
417 t = q = urlpath;
418 while (*t) {
419 if (!xisspace(*t)) {
420 *q = *t;
421 ++q;
422 }
423 ++t;
424 }
425 *q = '\0';
426 }
427 }
428
429 return urlParseFinish(method, protocol, proto, urlpath, host, SBuf(login), port, request);
430 }
431
432 /**
433 * Update request with parsed URI data. If the request arg is
434 * non-NULL, put parsed values there instead of allocating a new
435 * HttpRequest.
436 */
437 static HttpRequest *
438 urlParseFinish(const HttpRequestMethod& method,
439 const AnyP::ProtocolType protocol,
440 const char *const protoStr, // for unknown protocols
441 const char *const urlpath,
442 const char *const host,
443 const SBuf &login,
444 const int port,
445 HttpRequest *request)
446 {
447 if (NULL == request)
448 request = new HttpRequest(method, protocol, protoStr, urlpath);
449 else {
450 request->initHTTP(method, protocol, protoStr, urlpath);
451 }
452
453 request->url.host(host);
454 request->url.userInfo(login);
455 request->url.port(port);
456 return request;
457 }
458
459 static HttpRequest *
460 urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request)
461 {
462 debugs(50, 5, "urnParse: " << urn);
463 if (request) {
464 request->initHTTP(method, AnyP::PROTO_URN, "urn", urn + 4);
465 return request;
466 }
467
468 return new HttpRequest(method, AnyP::PROTO_URN, "urn", urn + 4);
469 }
470
471 void
472 URL::touch()
473 {
474 absolute_.clear();
475 authorityHttp_.clear();
476 authorityWithPort_.clear();
477 }
478
479 SBuf &
480 URL::authority(bool requirePort) const
481 {
482 if (authorityHttp_.isEmpty()) {
483
484 // both formats contain Host/IP
485 authorityWithPort_.append(host());
486 authorityHttp_ = authorityWithPort_;
487
488 // authorityForm_ only has :port if it is non-default
489 authorityWithPort_.appendf(":%u",port());
490 if (port() != getScheme().defaultPort())
491 authorityHttp_ = authorityWithPort_;
492 }
493
494 return requirePort ? authorityWithPort_ : authorityHttp_;
495 }
496
497 SBuf &
498 URL::absolute() const
499 {
500 if (absolute_.isEmpty()) {
501 // TODO: most URL will be much shorter, avoid allocating this much
502 absolute_.reserveCapacity(MAX_URL);
503
504 absolute_.append(getScheme().image());
505 absolute_.append(":",1);
506 if (getScheme() != AnyP::PROTO_URN) {
507 absolute_.append("//", 2);
508 const bool omitUserInfo = getScheme() == AnyP::PROTO_HTTP ||
509 getScheme() != AnyP::PROTO_HTTPS ||
510 userInfo().isEmpty();
511 if (!omitUserInfo) {
512 absolute_.append(userInfo());
513 absolute_.append("@", 1);
514 }
515 absolute_.append(authority());
516 }
517 absolute_.append(path());
518 }
519
520 return absolute_;
521 }
522
523 /** \todo AYJ: Performance: This is an *almost* duplicate of HttpRequest::effectiveRequestUri(). But elides the query-string.
524 * After copying it on in the first place! Would be less code to merge the two with a flag parameter.
525 * and never copy the query-string part in the first place
526 */
527 char *
528 urlCanonicalClean(const HttpRequest * request)
529 {
530 LOCAL_ARRAY(char, buf, MAX_URL);
531
532 snprintf(buf, sizeof(buf), SQUIDSBUFPH, SQUIDSBUFPRINT(request->effectiveRequestUri()));
533 buf[sizeof(buf)-1] = '\0';
534
535 // URN, CONNECT method, and non-stripped URIs can go straight out
536 if (Config.onoff.strip_query_terms && !(request->method == Http::METHOD_CONNECT || request->url.getScheme() == AnyP::PROTO_URN)) {
537 // strip anything AFTER a question-mark
538 // leaving the '?' in place
539 if (auto t = strchr(buf, '?')) {
540 *(++t) = '\0';
541 }
542 }
543
544 if (stringHasCntl(buf))
545 xstrncpy(buf, rfc1738_escape_unescaped(buf), MAX_URL);
546
547 return buf;
548 }
549
550 /**
551 * Yet another alternative to urlCanonical.
552 * This one adds the https:// parts to Http::METHOD_CONNECT URL
553 * for use in error page outputs.
554 * Luckily we can leverage the others instead of duplicating.
555 */
556 const char *
557 urlCanonicalFakeHttps(const HttpRequest * request)
558 {
559 LOCAL_ARRAY(char, buf, MAX_URL);
560
561 // method CONNECT and port HTTPS
562 if (request->method == Http::METHOD_CONNECT && request->url.port() == 443) {
563 snprintf(buf, MAX_URL, "https://%s/*", request->url.host());
564 return buf;
565 }
566
567 // else do the normal complete canonical thing.
568 return urlCanonicalClean(request);
569 }
570
571 /*
572 * Test if a URL is relative.
573 *
574 * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
575 * appear before a ':'.
576 */
577 bool
578 urlIsRelative(const char *url)
579 {
580 const char *p;
581
582 if (url == NULL) {
583 return (false);
584 }
585 if (*url == '\0') {
586 return (false);
587 }
588
589 for (p = url; *p != '\0' && *p != ':' && *p != '/'; ++p);
590
591 if (*p == ':') {
592 return (false);
593 }
594 return (true);
595 }
596
597 /*
598 * Convert a relative URL to an absolute URL using the context of a given
599 * request.
600 *
601 * It is assumed that you have already ensured that the URL is relative.
602 *
603 * If NULL is returned it is an indication that the method in use in the
604 * request does not distinguish between relative and absolute and you should
605 * use the url unchanged.
606 *
607 * If non-NULL is returned, it is up to the caller to free the resulting
608 * memory using safe_free().
609 */
610 char *
611 urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
612 {
613
614 if (req->method.id() == Http::METHOD_CONNECT) {
615 return (NULL);
616 }
617
618 char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
619
620 if (req->url.getScheme() == AnyP::PROTO_URN) {
621 // XXX: this is what the original code did, but it seems to break the
622 // intended behaviour of this function. It returns the stored URN path,
623 // not converting the given one into a URN...
624 snprintf(urlbuf, MAX_URL, SQUIDSBUFPH, SQUIDSBUFPRINT(req->url.absolute()));
625 return (urlbuf);
626 }
627
628 SBuf authorityForm = req->url.authority(); // host[:port]
629 const SBuf &scheme = req->url.getScheme().image();
630 size_t urllen = snprintf(urlbuf, MAX_URL, SQUIDSBUFPH "://" SQUIDSBUFPH "%s" SQUIDSBUFPH,
631 SQUIDSBUFPRINT(scheme),
632 SQUIDSBUFPRINT(req->url.userInfo()),
633 !req->url.userInfo().isEmpty() ? "@" : "",
634 SQUIDSBUFPRINT(authorityForm));
635
636 // if the first char is '/' assume its a relative path
637 // XXX: this breaks on scheme-relative URLs,
638 // but we should not see those outside ESI, and rarely there.
639 // XXX: also breaks on any URL containing a '/' in the query-string portion
640 if (relUrl[0] == '/') {
641 xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
642 } else {
643 SBuf path = req->url.path();
644 SBuf::size_type lastSlashPos = path.rfind('/');
645
646 if (lastSlashPos == SBuf::npos) {
647 // replace the whole path with the given bit(s)
648 urlbuf[urllen] = '/';
649 ++urllen;
650 xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
651 } else {
652 // replace only the last (file?) segment with the given bit(s)
653 ++lastSlashPos;
654 if (lastSlashPos > MAX_URL - urllen - 1) {
655 // XXX: crops bits in the middle of the combined URL.
656 lastSlashPos = MAX_URL - urllen - 1;
657 }
658 SBufToCstring(&urlbuf[urllen], path.substr(0,lastSlashPos));
659 urllen += lastSlashPos;
660 if (urllen + 1 < MAX_URL) {
661 xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
662 }
663 }
664 }
665
666 return (urlbuf);
667 }
668
669 int
670 matchDomainName(const char *h, const char *d, bool honorWildcards)
671 {
672 int dl;
673 int hl;
674
675 while ('.' == *h)
676 ++h;
677
678 hl = strlen(h);
679
680 dl = strlen(d);
681
682 /*
683 * Start at the ends of the two strings and work towards the
684 * beginning.
685 */
686 while (xtolower(h[--hl]) == xtolower(d[--dl])) {
687 if (hl == 0 && dl == 0) {
688 /*
689 * We made it all the way to the beginning of both
690 * strings without finding any difference.
691 */
692 return 0;
693 }
694
695 if (0 == hl) {
696 /*
697 * The host string is shorter than the domain string.
698 * There is only one case when this can be a match.
699 * If the domain is just one character longer, and if
700 * that character is a leading '.' then we call it a
701 * match.
702 */
703
704 if (1 == dl && '.' == d[0])
705 return 0;
706 else
707 return -1;
708 }
709
710 if (0 == dl) {
711 /*
712 * The domain string is shorter than the host string.
713 * This is a match only if the first domain character
714 * is a leading '.'.
715 */
716
717 if ('.' == d[0])
718 return 0;
719 else
720 return 1;
721 }
722 }
723
724 /*
725 * We found different characters in the same position (from the end).
726 */
727
728 // If the h has a form of "*.foo.com" and d has a form of "x.foo.com"
729 // then the h[hl] points to '*', h[hl+1] to '.' and d[dl] to 'x'
730 // The following checks are safe, the "h[hl + 1]" in the worst case is '\0'.
731 if (honorWildcards && h[hl] == '*' && h[hl + 1] == '.')
732 return 0;
733
734 /*
735 * If one of those character is '.' then its special. In order
736 * for splay tree sorting to work properly, "x-foo.com" must
737 * be greater than ".foo.com" even though '-' is less than '.'.
738 */
739 if ('.' == d[dl])
740 return 1;
741
742 if ('.' == h[hl])
743 return -1;
744
745 return (xtolower(h[hl]) - xtolower(d[dl]));
746 }
747
748 /*
749 * return true if we can serve requests for this method.
750 */
751 int
752 urlCheckRequest(const HttpRequest * r)
753 {
754 int rc = 0;
755 /* protocol "independent" methods
756 *
757 * actually these methods are specific to HTTP:
758 * they are methods we recieve on our HTTP port,
759 * and if we had a FTP listener would not be relevant
760 * there.
761 *
762 * So, we should delegate them to HTTP. The problem is that we
763 * do not have a default protocol from the client side of HTTP.
764 */
765
766 if (r->method == Http::METHOD_CONNECT)
767 return 1;
768
769 // we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
770 // we also support forwarding OPTIONS and TRACE, except for the *-URI ones
771 if (r->method == Http::METHOD_OPTIONS || r->method == Http::METHOD_TRACE)
772 return (r->header.getInt64(Http::HdrType::MAX_FORWARDS) == 0 || r->url.path() != URL::Asterisk());
773
774 if (r->method == Http::METHOD_PURGE)
775 return 1;
776
777 /* does method match the protocol? */
778 switch (r->url.getScheme()) {
779
780 case AnyP::PROTO_URN:
781
782 case AnyP::PROTO_HTTP:
783
784 case AnyP::PROTO_CACHE_OBJECT:
785 rc = 1;
786 break;
787
788 case AnyP::PROTO_FTP:
789
790 if (r->method == Http::METHOD_PUT)
791 rc = 1;
792
793 case AnyP::PROTO_GOPHER:
794
795 case AnyP::PROTO_WAIS:
796
797 case AnyP::PROTO_WHOIS:
798 if (r->method == Http::METHOD_GET)
799 rc = 1;
800 else if (r->method == Http::METHOD_HEAD)
801 rc = 1;
802
803 break;
804
805 case AnyP::PROTO_HTTPS:
806 #if USE_OPENSSL
807 rc = 1;
808 #else
809 /*
810 * Squid can't originate an SSL connection, so it should
811 * never receive an "https:" URL. It should always be
812 * CONNECT instead.
813 */
814 rc = 0;
815 #endif
816 break;
817
818 default:
819 break;
820 }
821
822 return rc;
823 }
824
825 /*
826 * Quick-n-dirty host extraction from a URL. Steps:
827 * Look for a colon
828 * Skip any '/' after the colon
829 * Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
830 * Look for an ending '/' or ':' and terminate
831 * Look for login info preceeded by '@'
832 */
833
834 class URLHostName
835 {
836
837 public:
838 char * extract(char const *url);
839
840 private:
841 static char Host [SQUIDHOSTNAMELEN];
842 void init(char const *);
843 void findHostStart();
844 void trimTrailingChars();
845 void trimAuth();
846 char const *hostStart;
847 char const *url;
848 };
849
850 char *
851 urlHostname(const char *url)
852 {
853 return URLHostName().extract(url);
854 }
855
856 char URLHostName::Host[SQUIDHOSTNAMELEN];
857
858 void
859 URLHostName::init(char const *aUrl)
860 {
861 Host[0] = '\0';
862 url = aUrl;
863 }
864
865 void
866 URLHostName::findHostStart()
867 {
868 if (NULL == (hostStart = strchr(url, ':')))
869 return;
870
871 ++hostStart;
872
873 while (*hostStart != '\0' && *hostStart == '/')
874 ++hostStart;
875
876 if (*hostStart == ']')
877 ++hostStart;
878 }
879
880 void
881 URLHostName::trimTrailingChars()
882 {
883 char *t;
884
885 if ((t = strchr(Host, '/')))
886 *t = '\0';
887
888 if ((t = strrchr(Host, ':')))
889 *t = '\0';
890
891 if ((t = strchr(Host, ']')))
892 *t = '\0';
893 }
894
895 void
896 URLHostName::trimAuth()
897 {
898 char *t;
899
900 if ((t = strrchr(Host, '@'))) {
901 ++t;
902 memmove(Host, t, strlen(t) + 1);
903 }
904 }
905
906 char *
907 URLHostName::extract(char const *aUrl)
908 {
909 init(aUrl);
910 findHostStart();
911
912 if (hostStart == NULL)
913 return NULL;
914
915 xstrncpy(Host, hostStart, SQUIDHOSTNAMELEN);
916
917 trimTrailingChars();
918
919 trimAuth();
920
921 return Host;
922 }
923
924 URL::URL(AnyP::UriScheme const &aScheme) :
925 scheme_(aScheme),
926 hostIsNumeric_(false),
927 port_(0)
928 {
929 *host_=0;
930 }
931