]> git.ipfire.org Git - thirdparty/squid.git/blob - src/url.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / url.cc
1 /*
2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 23 URL Parsing */
10
11 #include "squid.h"
12 #include "globals.h"
13 #include "HttpRequest.h"
14 #include "rfc1738.h"
15 #include "SquidConfig.h"
16 #include "SquidString.h"
17 #include "URL.h"
18
19 static HttpRequest *urlParseFinish(const HttpRequestMethod& method,
20 const AnyP::ProtocolType protocol,
21 const char *const urlpath,
22 const char *const host,
23 const SBuf &login,
24 const int port,
25 HttpRequest *request);
26 static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request);
27 static const char valid_hostname_chars_u[] =
28 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
29 "abcdefghijklmnopqrstuvwxyz"
30 "0123456789-._"
31 "[:]"
32 ;
33 static const char valid_hostname_chars[] =
34 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
35 "abcdefghijklmnopqrstuvwxyz"
36 "0123456789-."
37 "[:]"
38 ;
39
40 const SBuf &
41 URL::Asterisk()
42 {
43 static SBuf star("*");
44 return star;
45 }
46
47 void
48 urlInitialize(void)
49 {
50 debugs(23, 5, "urlInitialize: Initializing...");
51 /* this ensures that the number of protocol strings is the same as
52 * the enum slots allocated because the last enum is always 'MAX'.
53 */
54 assert(strcmp(AnyP::ProtocolType_str[AnyP::PROTO_MAX], "MAX") == 0);
55 /*
56 * These test that our matchDomainName() function works the
57 * way we expect it to.
58 */
59 assert(0 == matchDomainName("foo.com", "foo.com"));
60 assert(0 == matchDomainName(".foo.com", "foo.com"));
61 assert(0 == matchDomainName("foo.com", ".foo.com"));
62 assert(0 == matchDomainName(".foo.com", ".foo.com"));
63 assert(0 == matchDomainName("x.foo.com", ".foo.com"));
64 assert(0 != matchDomainName("x.foo.com", "foo.com"));
65 assert(0 != matchDomainName("foo.com", "x.foo.com"));
66 assert(0 != matchDomainName("bar.com", "foo.com"));
67 assert(0 != matchDomainName(".bar.com", "foo.com"));
68 assert(0 != matchDomainName(".bar.com", ".foo.com"));
69 assert(0 != matchDomainName("bar.com", ".foo.com"));
70 assert(0 < matchDomainName("zzz.com", "foo.com"));
71 assert(0 > matchDomainName("aaa.com", "foo.com"));
72 assert(0 == matchDomainName("FOO.com", "foo.COM"));
73 assert(0 < matchDomainName("bfoo.com", "afoo.com"));
74 assert(0 > matchDomainName("afoo.com", "bfoo.com"));
75 assert(0 < matchDomainName("x-foo.com", ".foo.com"));
76 /* more cases? */
77 }
78
79 /**
80 * urlParseProtocol() takes begin (b) and end (e) pointers, but for
81 * backwards compatibility, e defaults to NULL, in which case we
82 * assume b is NULL-terminated.
83 */
84 AnyP::ProtocolType
85 urlParseProtocol(const char *b, const char *e)
86 {
87 /*
88 * if e is NULL, b must be NULL terminated and we
89 * make e point to the first whitespace character
90 * after b.
91 */
92
93 if (NULL == e)
94 e = b + strcspn(b, ":");
95
96 int len = e - b;
97
98 /* test common stuff first */
99
100 if (strncasecmp(b, "http", len) == 0)
101 return AnyP::PROTO_HTTP;
102
103 if (strncasecmp(b, "ftp", len) == 0)
104 return AnyP::PROTO_FTP;
105
106 if (strncasecmp(b, "https", len) == 0)
107 return AnyP::PROTO_HTTPS;
108
109 if (strncasecmp(b, "file", len) == 0)
110 return AnyP::PROTO_FTP;
111
112 if (strncasecmp(b, "coap", len) == 0)
113 return AnyP::PROTO_COAP;
114
115 if (strncasecmp(b, "coaps", len) == 0)
116 return AnyP::PROTO_COAPS;
117
118 if (strncasecmp(b, "gopher", len) == 0)
119 return AnyP::PROTO_GOPHER;
120
121 if (strncasecmp(b, "wais", len) == 0)
122 return AnyP::PROTO_WAIS;
123
124 if (strncasecmp(b, "cache_object", len) == 0)
125 return AnyP::PROTO_CACHE_OBJECT;
126
127 if (strncasecmp(b, "urn", len) == 0)
128 return AnyP::PROTO_URN;
129
130 if (strncasecmp(b, "whois", len) == 0)
131 return AnyP::PROTO_WHOIS;
132
133 return AnyP::PROTO_NONE;
134 }
135
136 int
137 urlDefaultPort(AnyP::ProtocolType p)
138 {
139 switch (p) {
140
141 case AnyP::PROTO_HTTP:
142 return 80;
143
144 case AnyP::PROTO_HTTPS:
145 return 443;
146
147 case AnyP::PROTO_FTP:
148 return 21;
149
150 case AnyP::PROTO_COAP:
151 case AnyP::PROTO_COAPS:
152 // coaps:// default is TBA as of draft-ietf-core-coap-08.
153 // Assuming IANA policy of allocating same port for base and TLS protocol versions will occur.
154 return 5683;
155
156 case AnyP::PROTO_GOPHER:
157 return 70;
158
159 case AnyP::PROTO_WAIS:
160 return 210;
161
162 case AnyP::PROTO_CACHE_OBJECT:
163 return CACHE_HTTP_PORT;
164
165 case AnyP::PROTO_WHOIS:
166 return 43;
167
168 default:
169 return 0;
170 }
171 }
172
173 /*
174 * Parse a URI/URL.
175 *
176 * If the 'request' arg is non-NULL, put parsed values there instead
177 * of allocating a new HttpRequest.
178 *
179 * This abuses HttpRequest as a way of representing the parsed url
180 * and its components.
181 * method is used to switch parsers and to init the HttpRequest.
182 * If method is Http::METHOD_CONNECT, then rather than a URL a hostname:port is
183 * looked for.
184 * The url is non const so that if its too long we can NULL-terminate it in place.
185 */
186
187 /*
188 * This routine parses a URL. Its assumed that the URL is complete -
189 * ie, the end of the string is the end of the URL. Don't pass a partial
190 * URL here as this routine doesn't have any way of knowing whether
191 * its partial or not (ie, it handles the case of no trailing slash as
192 * being "end of host with implied path of /".
193 */
194 HttpRequest *
195 urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
196 {
197 LOCAL_ARRAY(char, proto, MAX_URL);
198 LOCAL_ARRAY(char, login, MAX_URL);
199 LOCAL_ARRAY(char, host, MAX_URL);
200 LOCAL_ARRAY(char, urlpath, MAX_URL);
201 char *t = NULL;
202 char *q = NULL;
203 int port;
204 AnyP::ProtocolType protocol = AnyP::PROTO_NONE;
205 int l;
206 int i;
207 const char *src;
208 char *dst;
209 proto[0] = host[0] = urlpath[0] = login[0] = '\0';
210
211 if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
212 /* terminate so it doesn't overflow other buffers */
213 *(url + (MAX_URL >> 1)) = '\0';
214 debugs(23, DBG_IMPORTANT, "urlParse: URL too large (" << l << " bytes)");
215 return NULL;
216 }
217 if (method == Http::METHOD_CONNECT) {
218 port = CONNECT_PORT;
219
220 if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
221 if (sscanf(url, "%[^:]:%d", host, &port) < 1)
222 return NULL;
223
224 } else if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
225 URL::Asterisk().cmp(url) == 0) {
226 protocol = AnyP::PROTO_HTTP;
227 port = urlDefaultPort(protocol);
228 return urlParseFinish(method, protocol, url, host, SBuf(), port, request);
229 } else if (!strncmp(url, "urn:", 4)) {
230 return urnParse(method, url, request);
231 } else {
232 /* Parse the URL: */
233 src = url;
234 i = 0;
235 /* Find first : - everything before is protocol */
236 for (i = 0, dst = proto; i < l && *src != ':'; ++i, ++src, ++dst) {
237 *dst = *src;
238 }
239 if (i >= l)
240 return NULL;
241 *dst = '\0';
242
243 /* Then its :// */
244 if ((i+3) > l || *src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
245 return NULL;
246 i += 3;
247 src += 3;
248
249 /* Then everything until first /; thats host (and port; which we'll look for here later) */
250 // bug 1881: If we don't get a "/" then we imply it was there
251 // bug 3074: We could just be given a "?" or "#". These also imply "/"
252 // bug 3233: whitespace is also a hostname delimiter.
253 for (dst = host; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
254 *dst = *src;
255 }
256
257 /*
258 * We can't check for "i >= l" here because we could be at the end of the line
259 * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
260 * been -given- a valid URL and the path is just '/'.
261 */
262 if (i > l)
263 return NULL;
264 *dst = '\0';
265
266 // bug 3074: received 'path' starting with '?', '#', or '\0' implies '/'
267 if (*src == '?' || *src == '#' || *src == '\0') {
268 urlpath[0] = '/';
269 dst = &urlpath[1];
270 } else {
271 dst = urlpath;
272 }
273 /* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
274 for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
275 *dst = *src;
276 }
277
278 /* We -could- be at the end of the buffer here */
279 if (i > l)
280 return NULL;
281 /* If the URL path is empty we set it to be "/" */
282 if (dst == urlpath) {
283 *dst = '/';
284 ++dst;
285 }
286 *dst = '\0';
287
288 protocol = urlParseProtocol(proto);
289 port = urlDefaultPort(protocol);
290
291 /* Is there any login information? (we should eventually parse it above) */
292 t = strrchr(host, '@');
293 if (t != NULL) {
294 strncpy((char *) login, (char *) host, sizeof(login)-1);
295 login[sizeof(login)-1] = '\0';
296 t = strrchr(login, '@');
297 *t = 0;
298 strncpy((char *) host, t + 1, sizeof(host)-1);
299 host[sizeof(host)-1] = '\0';
300 }
301
302 /* Is there any host information? (we should eventually parse it above) */
303 if (*host == '[') {
304 /* strip any IPA brackets. valid under IPv6. */
305 dst = host;
306 /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
307 src = host;
308 ++src;
309 l = strlen(host);
310 i = 1;
311 for (; i < l && *src != ']' && *src != '\0'; ++i, ++src, ++dst) {
312 *dst = *src;
313 }
314
315 /* we moved in-place, so truncate the actual hostname found */
316 *dst = '\0';
317 ++dst;
318
319 /* skip ahead to either start of port, or original EOS */
320 while (*dst != '\0' && *dst != ':')
321 ++dst;
322 t = dst;
323 } else {
324 t = strrchr(host, ':');
325
326 if (t != strchr(host,':') ) {
327 /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
328 /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
329 /* therefore we MUST accept the case where they are not bracketed at all. */
330 t = NULL;
331 }
332 }
333
334 // Bug 3183 sanity check: If scheme is present, host must be too.
335 if (protocol != AnyP::PROTO_NONE && host[0] == '\0') {
336 debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details.");
337 return NULL;
338 }
339
340 if (t && *t == ':') {
341 *t = '\0';
342 ++t;
343 port = atoi(t);
344 }
345 }
346
347 for (t = host; *t; ++t)
348 *t = xtolower(*t);
349
350 if (stringHasWhitespace(host)) {
351 if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
352 t = q = host;
353 while (*t) {
354 if (!xisspace(*t)) {
355 *q = *t;
356 ++q;
357 }
358 ++t;
359 }
360 *q = '\0';
361 }
362 }
363
364 debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
365
366 if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
367 debugs(23, DBG_IMPORTANT, "urlParse: Illegal character in hostname '" << host << "'");
368 return NULL;
369 }
370
371 /* For IPV6 addresses also check for a colon */
372 if (Config.appendDomain && !strchr(host, '.') && !strchr(host, ':'))
373 strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
374
375 /* remove trailing dots from hostnames */
376 while ((l = strlen(host)) > 0 && host[--l] == '.')
377 host[l] = '\0';
378
379 /* reject duplicate or leading dots */
380 if (strstr(host, "..") || *host == '.') {
381 debugs(23, DBG_IMPORTANT, "urlParse: Illegal hostname '" << host << "'");
382 return NULL;
383 }
384
385 if (port < 1 || port > 65535) {
386 debugs(23, 3, "urlParse: Invalid port '" << port << "'");
387 return NULL;
388 }
389
390 #if HARDCODE_DENY_PORTS
391 /* These ports are filtered in the default squid.conf, but
392 * maybe someone wants them hardcoded... */
393 if (port == 7 || port == 9 || port == 19) {
394 debugs(23, DBG_CRITICAL, "urlParse: Deny access to port " << port);
395 return NULL;
396 }
397 #endif
398
399 if (stringHasWhitespace(urlpath)) {
400 debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
401
402 switch (Config.uri_whitespace) {
403
404 case URI_WHITESPACE_DENY:
405 return NULL;
406
407 case URI_WHITESPACE_ALLOW:
408 break;
409
410 case URI_WHITESPACE_ENCODE:
411 t = rfc1738_escape_unescaped(urlpath);
412 xstrncpy(urlpath, t, MAX_URL);
413 break;
414
415 case URI_WHITESPACE_CHOP:
416 *(urlpath + strcspn(urlpath, w_space)) = '\0';
417 break;
418
419 case URI_WHITESPACE_STRIP:
420 default:
421 t = q = urlpath;
422 while (*t) {
423 if (!xisspace(*t)) {
424 *q = *t;
425 ++q;
426 }
427 ++t;
428 }
429 *q = '\0';
430 }
431 }
432
433 return urlParseFinish(method, protocol, urlpath, host, SBuf(login), port, request);
434 }
435
436 /**
437 * Update request with parsed URI data. If the request arg is
438 * non-NULL, put parsed values there instead of allocating a new
439 * HttpRequest.
440 */
441 static HttpRequest *
442 urlParseFinish(const HttpRequestMethod& method,
443 const AnyP::ProtocolType protocol,
444 const char *const urlpath,
445 const char *const host,
446 const SBuf &login,
447 const int port,
448 HttpRequest *request)
449 {
450 if (NULL == request)
451 request = new HttpRequest(method, protocol, urlpath);
452 else {
453 request->initHTTP(method, protocol, urlpath);
454 safe_free(request->canonical);
455 }
456
457 request->SetHost(host);
458 request->url.userInfo(login);
459 request->port = (unsigned short) port;
460 return request;
461 }
462
463 static HttpRequest *
464 urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request)
465 {
466 debugs(50, 5, "urnParse: " << urn);
467 if (request) {
468 request->initHTTP(method, AnyP::PROTO_URN, urn + 4);
469 safe_free(request->canonical);
470 return request;
471 }
472
473 return new HttpRequest(method, AnyP::PROTO_URN, urn + 4);
474 }
475
476 const char *
477 urlCanonical(HttpRequest * request)
478 {
479 LOCAL_ARRAY(char, portbuf, 32);
480 LOCAL_ARRAY(char, urlbuf, MAX_URL);
481
482 if (request->canonical)
483 return request->canonical;
484
485 if (request->url.getScheme() == AnyP::PROTO_URN) {
486 snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
487 SQUIDSTRINGPRINT(request->urlpath));
488 } else {
489 switch (request->method.id()) {
490
491 case Http::METHOD_CONNECT:
492 snprintf(urlbuf, MAX_URL, "%s:%d", request->GetHost(), request->port);
493 break;
494
495 default: {
496 portbuf[0] = '\0';
497
498 if (request->port != urlDefaultPort(request->url.getScheme()))
499 snprintf(portbuf, 32, ":%d", request->port);
500
501 snprintf(urlbuf, MAX_URL, "%s://" SQUIDSBUFPH "%s%s%s" SQUIDSTRINGPH,
502 request->url.getScheme().c_str(),
503 SQUIDSBUFPRINT(request->url.userInfo()),
504 !request->url.userInfo().isEmpty() ? "@" : "",
505 request->GetHost(),
506 portbuf,
507 SQUIDSTRINGPRINT(request->urlpath));
508 }
509 }
510 }
511
512 return (request->canonical = xstrdup(urlbuf));
513 }
514
515 /** \todo AYJ: Performance: This is an *almost* duplicate of urlCanonical. But elides the query-string.
516 * After copying it on in the first place! Would be less code to merge the two with a flag parameter.
517 * and never copy the query-string part in the first place
518 */
519 char *
520 urlCanonicalClean(const HttpRequest * request)
521 {
522 LOCAL_ARRAY(char, buf, MAX_URL);
523 LOCAL_ARRAY(char, portbuf, 32);
524 char *t;
525
526 if (request->url.getScheme() == AnyP::PROTO_URN) {
527 snprintf(buf, MAX_URL, "urn:" SQUIDSTRINGPH,
528 SQUIDSTRINGPRINT(request->urlpath));
529 } else {
530 switch (request->method.id()) {
531
532 case Http::METHOD_CONNECT:
533 snprintf(buf, MAX_URL, "%s:%d", request->GetHost(), request->port);
534 break;
535
536 default: {
537 portbuf[0] = '\0';
538
539 if (request->port != urlDefaultPort(request->url.getScheme()))
540 snprintf(portbuf, 32, ":%d", request->port);
541
542 snprintf(buf, MAX_URL, "%s://" SQUIDSBUFPH "%s%s%s" SQUIDSTRINGPH,
543 request->url.getScheme().c_str(),
544 SQUIDSBUFPRINT(request->url.userInfo()),
545 (request->url.userInfo().isEmpty() ? "" : "@"),
546 request->GetHost(),
547 portbuf,
548 SQUIDSTRINGPRINT(request->urlpath));
549
550 // strip arguments AFTER a question-mark
551 if (Config.onoff.strip_query_terms)
552 if ((t = strchr(buf, '?')))
553 *(++t) = '\0';
554 }
555 } // switch
556 }
557
558 if (stringHasCntl(buf))
559 xstrncpy(buf, rfc1738_escape_unescaped(buf), MAX_URL);
560
561 return buf;
562 }
563
564 /**
565 * Yet another alternative to urlCanonical.
566 * This one adds the https:// parts to Http::METHOD_CONNECT URL
567 * for use in error page outputs.
568 * Luckily we can leverage the others instead of duplicating.
569 */
570 const char *
571 urlCanonicalFakeHttps(const HttpRequest * request)
572 {
573 LOCAL_ARRAY(char, buf, MAX_URL);
574
575 // method CONNECT and port HTTPS
576 if (request->method == Http::METHOD_CONNECT && request->port == 443) {
577 snprintf(buf, MAX_URL, "https://%s/*", request->GetHost());
578 return buf;
579 }
580
581 // else do the normal complete canonical thing.
582 return urlCanonicalClean(request);
583 }
584
585 /*
586 * Test if a URL is relative.
587 *
588 * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
589 * appear before a ':'.
590 */
591 bool
592 urlIsRelative(const char *url)
593 {
594 const char *p;
595
596 if (url == NULL) {
597 return (false);
598 }
599 if (*url == '\0') {
600 return (false);
601 }
602
603 for (p = url; *p != '\0' && *p != ':' && *p != '/'; ++p);
604
605 if (*p == ':') {
606 return (false);
607 }
608 return (true);
609 }
610
611 /*
612 * Convert a relative URL to an absolute URL using the context of a given
613 * request.
614 *
615 * It is assumed that you have already ensured that the URL is relative.
616 *
617 * If NULL is returned it is an indication that the method in use in the
618 * request does not distinguish between relative and absolute and you should
619 * use the url unchanged.
620 *
621 * If non-NULL is returned, it is up to the caller to free the resulting
622 * memory using safe_free().
623 */
624 char *
625 urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
626 {
627
628 if (req->method.id() == Http::METHOD_CONNECT) {
629 return (NULL);
630 }
631
632 char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
633
634 if (req->url.getScheme() == AnyP::PROTO_URN) {
635 snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
636 SQUIDSTRINGPRINT(req->urlpath));
637 return (urlbuf);
638 }
639
640 size_t urllen;
641
642 if (req->port != urlDefaultPort(req->url.getScheme())) {
643 urllen = snprintf(urlbuf, MAX_URL, "%s://" SQUIDSBUFPH "%s%s:%d",
644 req->url.getScheme().c_str(),
645 SQUIDSBUFPRINT(req->url.userInfo()),
646 !req->url.userInfo().isEmpty() ? "@" : "",
647 req->GetHost(),
648 req->port
649 );
650 } else {
651 urllen = snprintf(urlbuf, MAX_URL, "%s://" SQUIDSBUFPH "%s%s",
652 req->url.getScheme().c_str(),
653 SQUIDSBUFPRINT(req->url.userInfo()),
654 !req->url.userInfo().isEmpty() ? "@" : "",
655 req->GetHost()
656 );
657 }
658
659 if (relUrl[0] == '/') {
660 strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
661 } else {
662 const char *path = req->urlpath.termedBuf();
663 const char *last_slash = strrchr(path, '/');
664
665 if (last_slash == NULL) {
666 urlbuf[urllen] = '/';
667 ++urllen;
668 strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
669 } else {
670 ++last_slash;
671 size_t pathlen = last_slash - path;
672 if (pathlen > MAX_URL - urllen - 1) {
673 pathlen = MAX_URL - urllen - 1;
674 }
675 strncpy(&urlbuf[urllen], path, pathlen);
676 urllen += pathlen;
677 if (urllen + 1 < MAX_URL) {
678 strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
679 }
680 }
681 }
682
683 return (urlbuf);
684 }
685
686 /*
687 * matchDomainName() compares a hostname with a domainname according
688 * to the following rules:
689 *
690 * HOST DOMAIN MATCH?
691 * ------------- ------------- ------
692 * foo.com foo.com YES
693 * .foo.com foo.com YES
694 * x.foo.com foo.com NO
695 * foo.com .foo.com YES
696 * .foo.com .foo.com YES
697 * x.foo.com .foo.com YES
698 *
699 * We strip leading dots on hosts (but not domains!) so that
700 * ".foo.com" is is always the same as "foo.com".
701 *
702 * Return values:
703 * 0 means the host matches the domain
704 * 1 means the host is greater than the domain
705 * -1 means the host is less than the domain
706 */
707
708 int
709 matchDomainName(const char *h, const char *d)
710 {
711 int dl;
712 int hl;
713
714 while ('.' == *h)
715 ++h;
716
717 hl = strlen(h);
718
719 dl = strlen(d);
720
721 /*
722 * Start at the ends of the two strings and work towards the
723 * beginning.
724 */
725 while (xtolower(h[--hl]) == xtolower(d[--dl])) {
726 if (hl == 0 && dl == 0) {
727 /*
728 * We made it all the way to the beginning of both
729 * strings without finding any difference.
730 */
731 return 0;
732 }
733
734 if (0 == hl) {
735 /*
736 * The host string is shorter than the domain string.
737 * There is only one case when this can be a match.
738 * If the domain is just one character longer, and if
739 * that character is a leading '.' then we call it a
740 * match.
741 */
742
743 if (1 == dl && '.' == d[0])
744 return 0;
745 else
746 return -1;
747 }
748
749 if (0 == dl) {
750 /*
751 * The domain string is shorter than the host string.
752 * This is a match only if the first domain character
753 * is a leading '.'.
754 */
755
756 if ('.' == d[0])
757 return 0;
758 else
759 return 1;
760 }
761 }
762
763 /*
764 * We found different characters in the same position (from the end).
765 */
766 /*
767 * If one of those character is '.' then its special. In order
768 * for splay tree sorting to work properly, "x-foo.com" must
769 * be greater than ".foo.com" even though '-' is less than '.'.
770 */
771 if ('.' == d[dl])
772 return 1;
773
774 if ('.' == h[hl])
775 return -1;
776
777 return (xtolower(h[hl]) - xtolower(d[dl]));
778 }
779
780 /*
781 * return true if we can serve requests for this method.
782 */
783 int
784 urlCheckRequest(const HttpRequest * r)
785 {
786 int rc = 0;
787 /* protocol "independent" methods
788 *
789 * actually these methods are specific to HTTP:
790 * they are methods we recieve on our HTTP port,
791 * and if we had a FTP listener would not be relevant
792 * there.
793 *
794 * So, we should delegate them to HTTP. The problem is that we
795 * do not have a default protocol from the client side of HTTP.
796 */
797
798 if (r->method == Http::METHOD_CONNECT)
799 return 1;
800
801 // we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
802 // we also support forwarding OPTIONS and TRACE, except for the *-URI ones
803 if (r->method == Http::METHOD_OPTIONS || r->method == Http::METHOD_TRACE)
804 return (r->header.getInt64(HDR_MAX_FORWARDS) == 0 || URL::Asterisk().cmp(r->urlpath.rawBuf(), r->urlpath.size()) != 0);
805
806 if (r->method == Http::METHOD_PURGE)
807 return 1;
808
809 /* does method match the protocol? */
810 switch (r->url.getScheme()) {
811
812 case AnyP::PROTO_URN:
813
814 case AnyP::PROTO_HTTP:
815
816 case AnyP::PROTO_CACHE_OBJECT:
817 rc = 1;
818 break;
819
820 case AnyP::PROTO_FTP:
821
822 if (r->method == Http::METHOD_PUT)
823 rc = 1;
824
825 case AnyP::PROTO_GOPHER:
826
827 case AnyP::PROTO_WAIS:
828
829 case AnyP::PROTO_WHOIS:
830 if (r->method == Http::METHOD_GET)
831 rc = 1;
832 else if (r->method == Http::METHOD_HEAD)
833 rc = 1;
834
835 break;
836
837 case AnyP::PROTO_HTTPS:
838 #if USE_OPENSSL
839
840 rc = 1;
841
842 break;
843
844 #else
845 /*
846 * Squid can't originate an SSL connection, so it should
847 * never receive an "https:" URL. It should always be
848 * CONNECT instead.
849 */
850 rc = 0;
851
852 #endif
853
854 default:
855 break;
856 }
857
858 return rc;
859 }
860
861 /*
862 * Quick-n-dirty host extraction from a URL. Steps:
863 * Look for a colon
864 * Skip any '/' after the colon
865 * Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
866 * Look for an ending '/' or ':' and terminate
867 * Look for login info preceeded by '@'
868 */
869
870 class URLHostName
871 {
872
873 public:
874 char * extract(char const *url);
875
876 private:
877 static char Host [SQUIDHOSTNAMELEN];
878 void init(char const *);
879 void findHostStart();
880 void trimTrailingChars();
881 void trimAuth();
882 char const *hostStart;
883 char const *url;
884 };
885
886 char *
887 urlHostname(const char *url)
888 {
889 return URLHostName().extract(url);
890 }
891
892 char URLHostName::Host[SQUIDHOSTNAMELEN];
893
894 void
895 URLHostName::init(char const *aUrl)
896 {
897 Host[0] = '\0';
898 url = aUrl;
899 }
900
901 void
902 URLHostName::findHostStart()
903 {
904 if (NULL == (hostStart = strchr(url, ':')))
905 return;
906
907 ++hostStart;
908
909 while (*hostStart != '\0' && *hostStart == '/')
910 ++hostStart;
911
912 if (*hostStart == ']')
913 ++hostStart;
914 }
915
916 void
917 URLHostName::trimTrailingChars()
918 {
919 char *t;
920
921 if ((t = strchr(Host, '/')))
922 *t = '\0';
923
924 if ((t = strrchr(Host, ':')))
925 *t = '\0';
926
927 if ((t = strchr(Host, ']')))
928 *t = '\0';
929 }
930
931 void
932 URLHostName::trimAuth()
933 {
934 char *t;
935
936 if ((t = strrchr(Host, '@'))) {
937 ++t;
938 memmove(Host, t, strlen(t) + 1);
939 }
940 }
941
942 char *
943 URLHostName::extract(char const *aUrl)
944 {
945 init(aUrl);
946 findHostStart();
947
948 if (hostStart == NULL)
949 return NULL;
950
951 xstrncpy(Host, hostStart, SQUIDHOSTNAMELEN);
952
953 trimTrailingChars();
954
955 trimAuth();
956
957 return Host;
958 }
959