]> git.ipfire.org Git - thirdparty/squid.git/blob - src/url.cc
Boilerplate: update copyright blurbs on src/
[thirdparty/squid.git] / src / url.cc
1 /*
2 * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 23 URL Parsing */
10
11 #include "squid.h"
12 #include "globals.h"
13 #include "HttpRequest.h"
14 #include "rfc1738.h"
15 #include "SquidConfig.h"
16 #include "SquidString.h"
17 #include "URL.h"
18
19 static HttpRequest *urlParseFinish(const HttpRequestMethod& method,
20 const AnyP::ProtocolType protocol,
21 const char *const urlpath,
22 const char *const host,
23 const char *const login,
24 const int port,
25 HttpRequest *request);
26 static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request);
27 static const char valid_hostname_chars_u[] =
28 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
29 "abcdefghijklmnopqrstuvwxyz"
30 "0123456789-._"
31 "[:]"
32 ;
33 static const char valid_hostname_chars[] =
34 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
35 "abcdefghijklmnopqrstuvwxyz"
36 "0123456789-."
37 "[:]"
38 ;
39
40 void
41 urlInitialize(void)
42 {
43 debugs(23, 5, "urlInitialize: Initializing...");
44 /* this ensures that the number of protocol strings is the same as
45 * the enum slots allocated because the last enum is always 'MAX'.
46 */
47 assert(strcmp(AnyP::ProtocolType_str[AnyP::PROTO_MAX], "MAX") == 0);
48 /*
49 * These test that our matchDomainName() function works the
50 * way we expect it to.
51 */
52 assert(0 == matchDomainName("foo.com", "foo.com"));
53 assert(0 == matchDomainName(".foo.com", "foo.com"));
54 assert(0 == matchDomainName("foo.com", ".foo.com"));
55 assert(0 == matchDomainName(".foo.com", ".foo.com"));
56 assert(0 == matchDomainName("x.foo.com", ".foo.com"));
57 assert(0 != matchDomainName("x.foo.com", "foo.com"));
58 assert(0 != matchDomainName("foo.com", "x.foo.com"));
59 assert(0 != matchDomainName("bar.com", "foo.com"));
60 assert(0 != matchDomainName(".bar.com", "foo.com"));
61 assert(0 != matchDomainName(".bar.com", ".foo.com"));
62 assert(0 != matchDomainName("bar.com", ".foo.com"));
63 assert(0 < matchDomainName("zzz.com", "foo.com"));
64 assert(0 > matchDomainName("aaa.com", "foo.com"));
65 assert(0 == matchDomainName("FOO.com", "foo.COM"));
66 assert(0 < matchDomainName("bfoo.com", "afoo.com"));
67 assert(0 > matchDomainName("afoo.com", "bfoo.com"));
68 assert(0 < matchDomainName("x-foo.com", ".foo.com"));
69 /* more cases? */
70 }
71
72 /**
73 * urlParseProtocol() takes begin (b) and end (e) pointers, but for
74 * backwards compatibility, e defaults to NULL, in which case we
75 * assume b is NULL-terminated.
76 */
77 AnyP::ProtocolType
78 urlParseProtocol(const char *b, const char *e)
79 {
80 /*
81 * if e is NULL, b must be NULL terminated and we
82 * make e point to the first whitespace character
83 * after b.
84 */
85
86 if (NULL == e)
87 e = b + strcspn(b, ":");
88
89 int len = e - b;
90
91 /* test common stuff first */
92
93 if (strncasecmp(b, "http", len) == 0)
94 return AnyP::PROTO_HTTP;
95
96 if (strncasecmp(b, "ftp", len) == 0)
97 return AnyP::PROTO_FTP;
98
99 if (strncasecmp(b, "https", len) == 0)
100 return AnyP::PROTO_HTTPS;
101
102 if (strncasecmp(b, "file", len) == 0)
103 return AnyP::PROTO_FTP;
104
105 if (strncasecmp(b, "coap", len) == 0)
106 return AnyP::PROTO_COAP;
107
108 if (strncasecmp(b, "coaps", len) == 0)
109 return AnyP::PROTO_COAPS;
110
111 if (strncasecmp(b, "gopher", len) == 0)
112 return AnyP::PROTO_GOPHER;
113
114 if (strncasecmp(b, "wais", len) == 0)
115 return AnyP::PROTO_WAIS;
116
117 if (strncasecmp(b, "cache_object", len) == 0)
118 return AnyP::PROTO_CACHE_OBJECT;
119
120 if (strncasecmp(b, "urn", len) == 0)
121 return AnyP::PROTO_URN;
122
123 if (strncasecmp(b, "whois", len) == 0)
124 return AnyP::PROTO_WHOIS;
125
126 return AnyP::PROTO_NONE;
127 }
128
129 int
130 urlDefaultPort(AnyP::ProtocolType p)
131 {
132 switch (p) {
133
134 case AnyP::PROTO_HTTP:
135 return 80;
136
137 case AnyP::PROTO_HTTPS:
138 return 443;
139
140 case AnyP::PROTO_FTP:
141 return 21;
142
143 case AnyP::PROTO_COAP:
144 case AnyP::PROTO_COAPS:
145 // coaps:// default is TBA as of draft-ietf-core-coap-08.
146 // Assuming IANA policy of allocating same port for base and TLS protocol versions will occur.
147 return 5683;
148
149 case AnyP::PROTO_GOPHER:
150 return 70;
151
152 case AnyP::PROTO_WAIS:
153 return 210;
154
155 case AnyP::PROTO_CACHE_OBJECT:
156 return CACHE_HTTP_PORT;
157
158 case AnyP::PROTO_WHOIS:
159 return 43;
160
161 default:
162 return 0;
163 }
164 }
165
166 /*
167 * Parse a URI/URL.
168 *
169 * If the 'request' arg is non-NULL, put parsed values there instead
170 * of allocating a new HttpRequest.
171 *
172 * This abuses HttpRequest as a way of representing the parsed url
173 * and its components.
174 * method is used to switch parsers and to init the HttpRequest.
175 * If method is Http::METHOD_CONNECT, then rather than a URL a hostname:port is
176 * looked for.
177 * The url is non const so that if its too long we can NULL-terminate it in place.
178 */
179
180 /*
181 * This routine parses a URL. Its assumed that the URL is complete -
182 * ie, the end of the string is the end of the URL. Don't pass a partial
183 * URL here as this routine doesn't have any way of knowing whether
184 * its partial or not (ie, it handles the case of no trailing slash as
185 * being "end of host with implied path of /".
186 */
187 HttpRequest *
188 urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
189 {
190 LOCAL_ARRAY(char, proto, MAX_URL);
191 LOCAL_ARRAY(char, login, MAX_URL);
192 LOCAL_ARRAY(char, host, MAX_URL);
193 LOCAL_ARRAY(char, urlpath, MAX_URL);
194 char *t = NULL;
195 char *q = NULL;
196 int port;
197 AnyP::ProtocolType protocol = AnyP::PROTO_NONE;
198 int l;
199 int i;
200 const char *src;
201 char *dst;
202 proto[0] = host[0] = urlpath[0] = login[0] = '\0';
203
204 if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
205 /* terminate so it doesn't overflow other buffers */
206 *(url + (MAX_URL >> 1)) = '\0';
207 debugs(23, DBG_IMPORTANT, "urlParse: URL too large (" << l << " bytes)");
208 return NULL;
209 }
210 if (method == Http::METHOD_CONNECT) {
211 port = CONNECT_PORT;
212
213 if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
214 if (sscanf(url, "%[^:]:%d", host, &port) < 1)
215 return NULL;
216
217 } else if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
218 strcmp(url, "*") == 0) {
219 protocol = AnyP::PROTO_HTTP;
220 port = urlDefaultPort(protocol);
221 return urlParseFinish(method, protocol, url, host, login, port, request);
222 } else if (!strncmp(url, "urn:", 4)) {
223 return urnParse(method, url, request);
224 } else {
225 /* Parse the URL: */
226 src = url;
227 i = 0;
228 /* Find first : - everything before is protocol */
229 for (i = 0, dst = proto; i < l && *src != ':'; ++i, ++src, ++dst) {
230 *dst = *src;
231 }
232 if (i >= l)
233 return NULL;
234 *dst = '\0';
235
236 /* Then its :// */
237 if ((i+3) > l || *src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
238 return NULL;
239 i += 3;
240 src += 3;
241
242 /* Then everything until first /; thats host (and port; which we'll look for here later) */
243 // bug 1881: If we don't get a "/" then we imply it was there
244 // bug 3074: We could just be given a "?" or "#". These also imply "/"
245 // bug 3233: whitespace is also a hostname delimiter.
246 for (dst = host; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
247 *dst = *src;
248 }
249
250 /*
251 * We can't check for "i >= l" here because we could be at the end of the line
252 * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
253 * been -given- a valid URL and the path is just '/'.
254 */
255 if (i > l)
256 return NULL;
257 *dst = '\0';
258
259 // bug 3074: received 'path' starting with '?', '#', or '\0' implies '/'
260 if (*src == '?' || *src == '#' || *src == '\0') {
261 urlpath[0] = '/';
262 dst = &urlpath[1];
263 } else {
264 dst = urlpath;
265 }
266 /* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
267 for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
268 *dst = *src;
269 }
270
271 /* We -could- be at the end of the buffer here */
272 if (i > l)
273 return NULL;
274 /* If the URL path is empty we set it to be "/" */
275 if (dst == urlpath) {
276 *dst = '/';
277 ++dst;
278 }
279 *dst = '\0';
280
281 protocol = urlParseProtocol(proto);
282 port = urlDefaultPort(protocol);
283
284 /* Is there any login information? (we should eventually parse it above) */
285 t = strrchr(host, '@');
286 if (t != NULL) {
287 strncpy((char *) login, (char *) host, sizeof(login)-1);
288 login[sizeof(login)-1] = '\0';
289 t = strrchr(login, '@');
290 *t = 0;
291 strncpy((char *) host, t + 1, sizeof(host)-1);
292 host[sizeof(host)-1] = '\0';
293 }
294
295 /* Is there any host information? (we should eventually parse it above) */
296 if (*host == '[') {
297 /* strip any IPA brackets. valid under IPv6. */
298 dst = host;
299 /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
300 src = host;
301 ++src;
302 l = strlen(host);
303 i = 1;
304 for (; i < l && *src != ']' && *src != '\0'; ++i, ++src, ++dst) {
305 *dst = *src;
306 }
307
308 /* we moved in-place, so truncate the actual hostname found */
309 *dst = '\0';
310 ++dst;
311
312 /* skip ahead to either start of port, or original EOS */
313 while (*dst != '\0' && *dst != ':')
314 ++dst;
315 t = dst;
316 } else {
317 t = strrchr(host, ':');
318
319 if (t != strchr(host,':') ) {
320 /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
321 /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
322 /* therefore we MUST accept the case where they are not bracketed at all. */
323 t = NULL;
324 }
325 }
326
327 // Bug 3183 sanity check: If scheme is present, host must be too.
328 if (protocol != AnyP::PROTO_NONE && host[0] == '\0') {
329 debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details.");
330 return NULL;
331 }
332
333 if (t && *t == ':') {
334 *t = '\0';
335 ++t;
336 port = atoi(t);
337 }
338 }
339
340 for (t = host; *t; ++t)
341 *t = xtolower(*t);
342
343 if (stringHasWhitespace(host)) {
344 if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
345 t = q = host;
346 while (*t) {
347 if (!xisspace(*t)) {
348 *q = *t;
349 ++q;
350 }
351 ++t;
352 }
353 *q = '\0';
354 }
355 }
356
357 debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
358
359 if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
360 debugs(23, DBG_IMPORTANT, "urlParse: Illegal character in hostname '" << host << "'");
361 return NULL;
362 }
363
364 /* For IPV6 addresses also check for a colon */
365 if (Config.appendDomain && !strchr(host, '.') && !strchr(host, ':'))
366 strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
367
368 /* remove trailing dots from hostnames */
369 while ((l = strlen(host)) > 0 && host[--l] == '.')
370 host[l] = '\0';
371
372 /* reject duplicate or leading dots */
373 if (strstr(host, "..") || *host == '.') {
374 debugs(23, DBG_IMPORTANT, "urlParse: Illegal hostname '" << host << "'");
375 return NULL;
376 }
377
378 if (port < 1 || port > 65535) {
379 debugs(23, 3, "urlParse: Invalid port '" << port << "'");
380 return NULL;
381 }
382
383 #if HARDCODE_DENY_PORTS
384 /* These ports are filtered in the default squid.conf, but
385 * maybe someone wants them hardcoded... */
386 if (port == 7 || port == 9 || port == 19) {
387 debugs(23, DBG_CRITICAL, "urlParse: Deny access to port " << port);
388 return NULL;
389 }
390 #endif
391
392 if (stringHasWhitespace(urlpath)) {
393 debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
394
395 switch (Config.uri_whitespace) {
396
397 case URI_WHITESPACE_DENY:
398 return NULL;
399
400 case URI_WHITESPACE_ALLOW:
401 break;
402
403 case URI_WHITESPACE_ENCODE:
404 t = rfc1738_escape_unescaped(urlpath);
405 xstrncpy(urlpath, t, MAX_URL);
406 break;
407
408 case URI_WHITESPACE_CHOP:
409 *(urlpath + strcspn(urlpath, w_space)) = '\0';
410 break;
411
412 case URI_WHITESPACE_STRIP:
413 default:
414 t = q = urlpath;
415 while (*t) {
416 if (!xisspace(*t)) {
417 *q = *t;
418 ++q;
419 }
420 ++t;
421 }
422 *q = '\0';
423 }
424 }
425
426 return urlParseFinish(method, protocol, urlpath, host, login, port, request);
427 }
428
429 /**
430 * Update request with parsed URI data. If the request arg is
431 * non-NULL, put parsed values there instead of allocating a new
432 * HttpRequest.
433 */
434 static HttpRequest *
435 urlParseFinish(const HttpRequestMethod& method,
436 const AnyP::ProtocolType protocol,
437 const char *const urlpath,
438 const char *const host,
439 const char *const login,
440 const int port,
441 HttpRequest *request)
442 {
443 if (NULL == request)
444 request = new HttpRequest(method, protocol, urlpath);
445 else {
446 request->initHTTP(method, protocol, urlpath);
447 safe_free(request->canonical);
448 }
449
450 request->SetHost(host);
451 xstrncpy(request->login, login, MAX_LOGIN_SZ);
452 request->port = (unsigned short) port;
453 return request;
454 }
455
456 static HttpRequest *
457 urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request)
458 {
459 debugs(50, 5, "urnParse: " << urn);
460 if (request) {
461 request->initHTTP(method, AnyP::PROTO_URN, urn + 4);
462 safe_free(request->canonical);
463 return request;
464 }
465
466 return new HttpRequest(method, AnyP::PROTO_URN, urn + 4);
467 }
468
469 const char *
470 urlCanonical(HttpRequest * request)
471 {
472 LOCAL_ARRAY(char, portbuf, 32);
473 LOCAL_ARRAY(char, urlbuf, MAX_URL);
474
475 if (request->canonical)
476 return request->canonical;
477
478 if (request->url.getScheme() == AnyP::PROTO_URN) {
479 snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
480 SQUIDSTRINGPRINT(request->urlpath));
481 } else {
482 switch (request->method.id()) {
483
484 case Http::METHOD_CONNECT:
485 snprintf(urlbuf, MAX_URL, "%s:%d", request->GetHost(), request->port);
486 break;
487
488 default: {
489 portbuf[0] = '\0';
490
491 if (request->port != urlDefaultPort(request->url.getScheme()))
492 snprintf(portbuf, 32, ":%d", request->port);
493
494 snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s" SQUIDSTRINGPH,
495 request->url.getScheme().c_str(),
496 request->login,
497 *request->login ? "@" : null_string,
498 request->GetHost(),
499 portbuf,
500 SQUIDSTRINGPRINT(request->urlpath));
501 }
502 }
503 }
504
505 return (request->canonical = xstrdup(urlbuf));
506 }
507
508 /** \todo AYJ: Performance: This is an *almost* duplicate of urlCanonical. But elides the query-string.
509 * After copying it on in the first place! Would be less code to merge the two with a flag parameter.
510 * and never copy the query-string part in the first place
511 */
512 char *
513 urlCanonicalClean(const HttpRequest * request)
514 {
515 LOCAL_ARRAY(char, buf, MAX_URL);
516 LOCAL_ARRAY(char, portbuf, 32);
517 LOCAL_ARRAY(char, loginbuf, MAX_LOGIN_SZ + 1);
518 char *t;
519
520 if (request->url.getScheme() == AnyP::PROTO_URN) {
521 snprintf(buf, MAX_URL, "urn:" SQUIDSTRINGPH,
522 SQUIDSTRINGPRINT(request->urlpath));
523 } else {
524 switch (request->method.id()) {
525
526 case Http::METHOD_CONNECT:
527 snprintf(buf, MAX_URL, "%s:%d", request->GetHost(), request->port);
528 break;
529
530 default: {
531 portbuf[0] = '\0';
532
533 if (request->port != urlDefaultPort(request->url.getScheme()))
534 snprintf(portbuf, 32, ":%d", request->port);
535
536 loginbuf[0] = '\0';
537
538 if ((int) strlen(request->login) > 0) {
539 strcpy(loginbuf, request->login);
540
541 if ((t = strchr(loginbuf, ':')))
542 *t = '\0';
543
544 strcat(loginbuf, "@");
545 }
546
547 snprintf(buf, MAX_URL, "%s://%s%s%s" SQUIDSTRINGPH,
548 request->url.getScheme().c_str(),
549 loginbuf,
550 request->GetHost(),
551 portbuf,
552 SQUIDSTRINGPRINT(request->urlpath));
553
554 // strip arguments AFTER a question-mark
555 if (Config.onoff.strip_query_terms)
556 if ((t = strchr(buf, '?')))
557 *(++t) = '\0';
558 }
559 }
560 }
561
562 if (stringHasCntl(buf))
563 xstrncpy(buf, rfc1738_escape_unescaped(buf), MAX_URL);
564
565 return buf;
566 }
567
568 /**
569 * Yet another alternative to urlCanonical.
570 * This one adds the https:// parts to Http::METHOD_CONNECT URL
571 * for use in error page outputs.
572 * Luckily we can leverage the others instead of duplicating.
573 */
574 const char *
575 urlCanonicalFakeHttps(const HttpRequest * request)
576 {
577 LOCAL_ARRAY(char, buf, MAX_URL);
578
579 // method CONNECT and port HTTPS
580 if (request->method == Http::METHOD_CONNECT && request->port == 443) {
581 snprintf(buf, MAX_URL, "https://%s/*", request->GetHost());
582 return buf;
583 }
584
585 // else do the normal complete canonical thing.
586 return urlCanonicalClean(request);
587 }
588
589 /*
590 * Test if a URL is relative.
591 *
592 * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
593 * appear before a ':'.
594 */
595 bool
596 urlIsRelative(const char *url)
597 {
598 const char *p;
599
600 if (url == NULL) {
601 return (false);
602 }
603 if (*url == '\0') {
604 return (false);
605 }
606
607 for (p = url; *p != '\0' && *p != ':' && *p != '/'; ++p);
608
609 if (*p == ':') {
610 return (false);
611 }
612 return (true);
613 }
614
615 /*
616 * Convert a relative URL to an absolute URL using the context of a given
617 * request.
618 *
619 * It is assumed that you have already ensured that the URL is relative.
620 *
621 * If NULL is returned it is an indication that the method in use in the
622 * request does not distinguish between relative and absolute and you should
623 * use the url unchanged.
624 *
625 * If non-NULL is returned, it is up to the caller to free the resulting
626 * memory using safe_free().
627 */
628 char *
629 urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
630 {
631
632 if (req->method.id() == Http::METHOD_CONNECT) {
633 return (NULL);
634 }
635
636 char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
637
638 if (req->url.getScheme() == AnyP::PROTO_URN) {
639 snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
640 SQUIDSTRINGPRINT(req->urlpath));
641 return (urlbuf);
642 }
643
644 size_t urllen;
645
646 if (req->port != urlDefaultPort(req->url.getScheme())) {
647 urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s:%d",
648 req->url.getScheme().c_str(),
649 req->login,
650 *req->login ? "@" : null_string,
651 req->GetHost(),
652 req->port
653 );
654 } else {
655 urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s",
656 req->url.getScheme().c_str(),
657 req->login,
658 *req->login ? "@" : null_string,
659 req->GetHost()
660 );
661 }
662
663 if (relUrl[0] == '/') {
664 strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
665 } else {
666 const char *path = req->urlpath.termedBuf();
667 const char *last_slash = strrchr(path, '/');
668
669 if (last_slash == NULL) {
670 urlbuf[urllen] = '/';
671 ++urllen;
672 strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
673 } else {
674 ++last_slash;
675 size_t pathlen = last_slash - path;
676 if (pathlen > MAX_URL - urllen - 1) {
677 pathlen = MAX_URL - urllen - 1;
678 }
679 strncpy(&urlbuf[urllen], path, pathlen);
680 urllen += pathlen;
681 if (urllen + 1 < MAX_URL) {
682 strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
683 }
684 }
685 }
686
687 return (urlbuf);
688 }
689
690 /*
691 * matchDomainName() compares a hostname with a domainname according
692 * to the following rules:
693 *
694 * HOST DOMAIN MATCH?
695 * ------------- ------------- ------
696 * foo.com foo.com YES
697 * .foo.com foo.com YES
698 * x.foo.com foo.com NO
699 * foo.com .foo.com YES
700 * .foo.com .foo.com YES
701 * x.foo.com .foo.com YES
702 *
703 * We strip leading dots on hosts (but not domains!) so that
704 * ".foo.com" is is always the same as "foo.com".
705 *
706 * Return values:
707 * 0 means the host matches the domain
708 * 1 means the host is greater than the domain
709 * -1 means the host is less than the domain
710 */
711
712 int
713 matchDomainName(const char *h, const char *d)
714 {
715 int dl;
716 int hl;
717
718 while ('.' == *h)
719 ++h;
720
721 hl = strlen(h);
722
723 dl = strlen(d);
724
725 /*
726 * Start at the ends of the two strings and work towards the
727 * beginning.
728 */
729 while (xtolower(h[--hl]) == xtolower(d[--dl])) {
730 if (hl == 0 && dl == 0) {
731 /*
732 * We made it all the way to the beginning of both
733 * strings without finding any difference.
734 */
735 return 0;
736 }
737
738 if (0 == hl) {
739 /*
740 * The host string is shorter than the domain string.
741 * There is only one case when this can be a match.
742 * If the domain is just one character longer, and if
743 * that character is a leading '.' then we call it a
744 * match.
745 */
746
747 if (1 == dl && '.' == d[0])
748 return 0;
749 else
750 return -1;
751 }
752
753 if (0 == dl) {
754 /*
755 * The domain string is shorter than the host string.
756 * This is a match only if the first domain character
757 * is a leading '.'.
758 */
759
760 if ('.' == d[0])
761 return 0;
762 else
763 return 1;
764 }
765 }
766
767 /*
768 * We found different characters in the same position (from the end).
769 */
770 /*
771 * If one of those character is '.' then its special. In order
772 * for splay tree sorting to work properly, "x-foo.com" must
773 * be greater than ".foo.com" even though '-' is less than '.'.
774 */
775 if ('.' == d[dl])
776 return 1;
777
778 if ('.' == h[hl])
779 return -1;
780
781 return (xtolower(h[hl]) - xtolower(d[dl]));
782 }
783
784 /*
785 * return true if we can serve requests for this method.
786 */
787 int
788 urlCheckRequest(const HttpRequest * r)
789 {
790 int rc = 0;
791 /* protocol "independent" methods
792 *
793 * actually these methods are specific to HTTP:
794 * they are methods we recieve on our HTTP port,
795 * and if we had a FTP listener would not be relevant
796 * there.
797 *
798 * So, we should delegate them to HTTP. The problem is that we
799 * do not have a default protocol from the client side of HTTP.
800 */
801
802 if (r->method == Http::METHOD_CONNECT)
803 return 1;
804
805 // we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
806 // we also support forwarding OPTIONS and TRACE, except for the *-URI ones
807 if (r->method == Http::METHOD_OPTIONS || r->method == Http::METHOD_TRACE)
808 return (r->header.getInt64(HDR_MAX_FORWARDS) == 0 || r->urlpath != "*");
809
810 if (r->method == Http::METHOD_PURGE)
811 return 1;
812
813 /* does method match the protocol? */
814 switch (r->url.getScheme()) {
815
816 case AnyP::PROTO_URN:
817
818 case AnyP::PROTO_HTTP:
819
820 case AnyP::PROTO_CACHE_OBJECT:
821 rc = 1;
822 break;
823
824 case AnyP::PROTO_FTP:
825
826 if (r->method == Http::METHOD_PUT)
827 rc = 1;
828
829 case AnyP::PROTO_GOPHER:
830
831 case AnyP::PROTO_WAIS:
832
833 case AnyP::PROTO_WHOIS:
834 if (r->method == Http::METHOD_GET)
835 rc = 1;
836 else if (r->method == Http::METHOD_HEAD)
837 rc = 1;
838
839 break;
840
841 case AnyP::PROTO_HTTPS:
842 #if USE_OPENSSL
843
844 rc = 1;
845
846 break;
847
848 #else
849 /*
850 * Squid can't originate an SSL connection, so it should
851 * never receive an "https:" URL. It should always be
852 * CONNECT instead.
853 */
854 rc = 0;
855
856 #endif
857
858 default:
859 break;
860 }
861
862 return rc;
863 }
864
865 /*
866 * Quick-n-dirty host extraction from a URL. Steps:
867 * Look for a colon
868 * Skip any '/' after the colon
869 * Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
870 * Look for an ending '/' or ':' and terminate
871 * Look for login info preceeded by '@'
872 */
873
874 class URLHostName
875 {
876
877 public:
878 char * extract(char const *url);
879
880 private:
881 static char Host [SQUIDHOSTNAMELEN];
882 void init(char const *);
883 void findHostStart();
884 void trimTrailingChars();
885 void trimAuth();
886 char const *hostStart;
887 char const *url;
888 };
889
890 char *
891 urlHostname(const char *url)
892 {
893 return URLHostName().extract(url);
894 }
895
896 char URLHostName::Host[SQUIDHOSTNAMELEN];
897
898 void
899 URLHostName::init(char const *aUrl)
900 {
901 Host[0] = '\0';
902 url = aUrl;
903 }
904
905 void
906 URLHostName::findHostStart()
907 {
908 if (NULL == (hostStart = strchr(url, ':')))
909 return;
910
911 ++hostStart;
912
913 while (*hostStart != '\0' && *hostStart == '/')
914 ++hostStart;
915
916 if (*hostStart == ']')
917 ++hostStart;
918 }
919
920 void
921 URLHostName::trimTrailingChars()
922 {
923 char *t;
924
925 if ((t = strchr(Host, '/')))
926 *t = '\0';
927
928 if ((t = strrchr(Host, ':')))
929 *t = '\0';
930
931 if ((t = strchr(Host, ']')))
932 *t = '\0';
933 }
934
935 void
936 URLHostName::trimAuth()
937 {
938 char *t;
939
940 if ((t = strrchr(Host, '@'))) {
941 ++t;
942 memmove(Host, t, strlen(t) + 1);
943 }
944 }
945
946 char *
947 URLHostName::extract(char const *aUrl)
948 {
949 init(aUrl);
950 findHostStart();
951
952 if (hostStart == NULL)
953 return NULL;
954
955 xstrncpy(Host, hostStart, SQUIDHOSTNAMELEN);
956
957 trimTrailingChars();
958
959 trimAuth();
960
961 return Host;
962 }