]> git.ipfire.org Git - thirdparty/squid.git/blob - src/url.cc
Merge from trunk
[thirdparty/squid.git] / src / url.cc
1
2 /*
3 * DEBUG: section 23 URL Parsing
4 * AUTHOR: Duane Wessels
5 *
6 * SQUID Web Proxy Cache http://www.squid-cache.org/
7 * ----------------------------------------------------------
8 *
9 * Squid is the result of efforts by numerous individuals from
10 * the Internet community; see the CONTRIBUTORS file for full
11 * details. Many organizations have provided support for Squid's
12 * development; see the SPONSORS file for full details. Squid is
13 * Copyrighted (C) 2001 by the Regents of the University of
14 * California; see the COPYRIGHT file for full details. Squid
15 * incorporates software developed and/or copyrighted by other
16 * sources; see the CREDITS file for full details.
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, write to the Free Software
30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
31 *
32 */
33
34 #include "squid.h"
35 #include "globals.h"
36 #include "HttpRequest.h"
37 #include "rfc1738.h"
38 #include "SquidConfig.h"
39 #include "SquidString.h"
40 #include "URL.h"
41
42 static HttpRequest *urlParseFinish(const HttpRequestMethod& method,
43 const AnyP::ProtocolType protocol,
44 const char *const urlpath,
45 const char *const host,
46 const char *const login,
47 const int port,
48 HttpRequest *request);
49 static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request);
50 static const char valid_hostname_chars_u[] =
51 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
52 "abcdefghijklmnopqrstuvwxyz"
53 "0123456789-._"
54 "[:]"
55 ;
56 static const char valid_hostname_chars[] =
57 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
58 "abcdefghijklmnopqrstuvwxyz"
59 "0123456789-."
60 "[:]"
61 ;
62
63 void
64 urlInitialize(void)
65 {
66 debugs(23, 5, "urlInitialize: Initializing...");
67 /* this ensures that the number of protocol strings is the same as
68 * the enum slots allocated because the last enum is always 'MAX'.
69 */
70 assert(strcmp(AnyP::ProtocolType_str[AnyP::PROTO_MAX], "MAX") == 0);
71 /*
72 * These test that our matchDomainName() function works the
73 * way we expect it to.
74 */
75 assert(0 == matchDomainName("foo.com", "foo.com"));
76 assert(0 == matchDomainName(".foo.com", "foo.com"));
77 assert(0 == matchDomainName("foo.com", ".foo.com"));
78 assert(0 == matchDomainName(".foo.com", ".foo.com"));
79 assert(0 == matchDomainName("x.foo.com", ".foo.com"));
80 assert(0 != matchDomainName("x.foo.com", "foo.com"));
81 assert(0 != matchDomainName("foo.com", "x.foo.com"));
82 assert(0 != matchDomainName("bar.com", "foo.com"));
83 assert(0 != matchDomainName(".bar.com", "foo.com"));
84 assert(0 != matchDomainName(".bar.com", ".foo.com"));
85 assert(0 != matchDomainName("bar.com", ".foo.com"));
86 assert(0 < matchDomainName("zzz.com", "foo.com"));
87 assert(0 > matchDomainName("aaa.com", "foo.com"));
88 assert(0 == matchDomainName("FOO.com", "foo.COM"));
89 assert(0 < matchDomainName("bfoo.com", "afoo.com"));
90 assert(0 > matchDomainName("afoo.com", "bfoo.com"));
91 assert(0 < matchDomainName("x-foo.com", ".foo.com"));
92 /* more cases? */
93 }
94
95 /**
96 * urlParseProtocol() takes begin (b) and end (e) pointers, but for
97 * backwards compatibility, e defaults to NULL, in which case we
98 * assume b is NULL-terminated.
99 */
100 AnyP::ProtocolType
101 urlParseProtocol(const char *b, const char *e)
102 {
103 /*
104 * if e is NULL, b must be NULL terminated and we
105 * make e point to the first whitespace character
106 * after b.
107 */
108
109 if (NULL == e)
110 e = b + strcspn(b, ":");
111
112 int len = e - b;
113
114 /* test common stuff first */
115
116 if (strncasecmp(b, "http", len) == 0)
117 return AnyP::PROTO_HTTP;
118
119 if (strncasecmp(b, "ftp", len) == 0)
120 return AnyP::PROTO_FTP;
121
122 if (strncasecmp(b, "https", len) == 0)
123 return AnyP::PROTO_HTTPS;
124
125 if (strncasecmp(b, "file", len) == 0)
126 return AnyP::PROTO_FTP;
127
128 if (strncasecmp(b, "coap", len) == 0)
129 return AnyP::PROTO_COAP;
130
131 if (strncasecmp(b, "coaps", len) == 0)
132 return AnyP::PROTO_COAPS;
133
134 if (strncasecmp(b, "gopher", len) == 0)
135 return AnyP::PROTO_GOPHER;
136
137 if (strncasecmp(b, "wais", len) == 0)
138 return AnyP::PROTO_WAIS;
139
140 if (strncasecmp(b, "cache_object", len) == 0)
141 return AnyP::PROTO_CACHE_OBJECT;
142
143 if (strncasecmp(b, "urn", len) == 0)
144 return AnyP::PROTO_URN;
145
146 if (strncasecmp(b, "whois", len) == 0)
147 return AnyP::PROTO_WHOIS;
148
149 return AnyP::PROTO_NONE;
150 }
151
152 int
153 urlDefaultPort(AnyP::ProtocolType p)
154 {
155 switch (p) {
156
157 case AnyP::PROTO_HTTP:
158 return 80;
159
160 case AnyP::PROTO_HTTPS:
161 return 443;
162
163 case AnyP::PROTO_FTP:
164 return 21;
165
166 case AnyP::PROTO_COAP:
167 case AnyP::PROTO_COAPS:
168 // coaps:// default is TBA as of draft-ietf-core-coap-08.
169 // Assuming IANA policy of allocating same port for base and TLS protocol versions will occur.
170 return 5683;
171
172 case AnyP::PROTO_GOPHER:
173 return 70;
174
175 case AnyP::PROTO_WAIS:
176 return 210;
177
178 case AnyP::PROTO_CACHE_OBJECT:
179 return CACHE_HTTP_PORT;
180
181 case AnyP::PROTO_WHOIS:
182 return 43;
183
184 default:
185 return 0;
186 }
187 }
188
189 /*
190 * Parse a URI/URL.
191 *
192 * If the 'request' arg is non-NULL, put parsed values there instead
193 * of allocating a new HttpRequest.
194 *
195 * This abuses HttpRequest as a way of representing the parsed url
196 * and its components.
197 * method is used to switch parsers and to init the HttpRequest.
198 * If method is Http::METHOD_CONNECT, then rather than a URL a hostname:port is
199 * looked for.
200 * The url is non const so that if its too long we can NULL-terminate it in place.
201 */
202
203 /*
204 * This routine parses a URL. Its assumed that the URL is complete -
205 * ie, the end of the string is the end of the URL. Don't pass a partial
206 * URL here as this routine doesn't have any way of knowing whether
207 * its partial or not (ie, it handles the case of no trailing slash as
208 * being "end of host with implied path of /".
209 */
210 HttpRequest *
211 urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
212 {
213 LOCAL_ARRAY(char, proto, MAX_URL);
214 LOCAL_ARRAY(char, login, MAX_URL);
215 LOCAL_ARRAY(char, host, MAX_URL);
216 LOCAL_ARRAY(char, urlpath, MAX_URL);
217 char *t = NULL;
218 char *q = NULL;
219 int port;
220 AnyP::ProtocolType protocol = AnyP::PROTO_NONE;
221 int l;
222 int i;
223 const char *src;
224 char *dst;
225 proto[0] = host[0] = urlpath[0] = login[0] = '\0';
226
227 if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
228 /* terminate so it doesn't overflow other buffers */
229 *(url + (MAX_URL >> 1)) = '\0';
230 debugs(23, DBG_IMPORTANT, "urlParse: URL too large (" << l << " bytes)");
231 return NULL;
232 }
233 if (method == Http::METHOD_CONNECT) {
234 port = CONNECT_PORT;
235
236 if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
237 if (sscanf(url, "%[^:]:%d", host, &port) < 1)
238 return NULL;
239
240 } else if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
241 strcmp(url, "*") == 0) {
242 protocol = AnyP::PROTO_HTTP;
243 port = urlDefaultPort(protocol);
244 return urlParseFinish(method, protocol, url, host, login, port, request);
245 } else if (!strncmp(url, "urn:", 4)) {
246 return urnParse(method, url, request);
247 } else {
248 /* Parse the URL: */
249 src = url;
250 i = 0;
251 /* Find first : - everything before is protocol */
252 for (i = 0, dst = proto; i < l && *src != ':'; ++i, ++src, ++dst) {
253 *dst = *src;
254 }
255 if (i >= l)
256 return NULL;
257 *dst = '\0';
258
259 /* Then its :// */
260 if ((i+3) > l || *src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
261 return NULL;
262 i += 3;
263 src += 3;
264
265 /* Then everything until first /; thats host (and port; which we'll look for here later) */
266 // bug 1881: If we don't get a "/" then we imply it was there
267 // bug 3074: We could just be given a "?" or "#". These also imply "/"
268 // bug 3233: whitespace is also a hostname delimiter.
269 for (dst = host; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
270 *dst = *src;
271 }
272
273 /*
274 * We can't check for "i >= l" here because we could be at the end of the line
275 * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
276 * been -given- a valid URL and the path is just '/'.
277 */
278 if (i > l)
279 return NULL;
280 *dst = '\0';
281
282 // bug 3074: received 'path' starting with '?', '#', or '\0' implies '/'
283 if (*src == '?' || *src == '#' || *src == '\0') {
284 urlpath[0] = '/';
285 dst = &urlpath[1];
286 } else {
287 dst = urlpath;
288 }
289 /* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
290 for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
291 *dst = *src;
292 }
293
294 /* We -could- be at the end of the buffer here */
295 if (i > l)
296 return NULL;
297 /* If the URL path is empty we set it to be "/" */
298 if (dst == urlpath) {
299 *dst = '/';
300 ++dst;
301 }
302 *dst = '\0';
303
304 protocol = urlParseProtocol(proto);
305 port = urlDefaultPort(protocol);
306
307 /* Is there any login information? (we should eventually parse it above) */
308 t = strrchr(host, '@');
309 if (t != NULL) {
310 strncpy((char *) login, (char *) host, sizeof(login)-1);
311 login[sizeof(login)-1] = '\0';
312 t = strrchr(login, '@');
313 *t = 0;
314 strncpy((char *) host, t + 1, sizeof(host)-1);
315 host[sizeof(host)-1] = '\0';
316 }
317
318 /* Is there any host information? (we should eventually parse it above) */
319 if (*host == '[') {
320 /* strip any IPA brackets. valid under IPv6. */
321 dst = host;
322 /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
323 src = host;
324 ++src;
325 l = strlen(host);
326 i = 1;
327 for (; i < l && *src != ']' && *src != '\0'; ++i, ++src, ++dst) {
328 *dst = *src;
329 }
330
331 /* we moved in-place, so truncate the actual hostname found */
332 *dst = '\0';
333 ++dst;
334
335 /* skip ahead to either start of port, or original EOS */
336 while (*dst != '\0' && *dst != ':')
337 ++dst;
338 t = dst;
339 } else {
340 t = strrchr(host, ':');
341
342 if (t != strchr(host,':') ) {
343 /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
344 /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
345 /* therefore we MUST accept the case where they are not bracketed at all. */
346 t = NULL;
347 }
348 }
349
350 // Bug 3183 sanity check: If scheme is present, host must be too.
351 if (protocol != AnyP::PROTO_NONE && host[0] == '\0') {
352 debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details.");
353 return NULL;
354 }
355
356 if (t && *t == ':') {
357 *t = '\0';
358 ++t;
359 port = atoi(t);
360 }
361 }
362
363 for (t = host; *t; ++t)
364 *t = xtolower(*t);
365
366 if (stringHasWhitespace(host)) {
367 if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
368 t = q = host;
369 while (*t) {
370 if (!xisspace(*t)) {
371 *q = *t;
372 ++q;
373 }
374 ++t;
375 }
376 *q = '\0';
377 }
378 }
379
380 debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
381
382 if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
383 debugs(23, DBG_IMPORTANT, "urlParse: Illegal character in hostname '" << host << "'");
384 return NULL;
385 }
386
387 /* For IPV6 addresses also check for a colon */
388 if (Config.appendDomain && !strchr(host, '.') && !strchr(host, ':'))
389 strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
390
391 /* remove trailing dots from hostnames */
392 while ((l = strlen(host)) > 0 && host[--l] == '.')
393 host[l] = '\0';
394
395 /* reject duplicate or leading dots */
396 if (strstr(host, "..") || *host == '.') {
397 debugs(23, DBG_IMPORTANT, "urlParse: Illegal hostname '" << host << "'");
398 return NULL;
399 }
400
401 if (port < 1 || port > 65535) {
402 debugs(23, 3, "urlParse: Invalid port '" << port << "'");
403 return NULL;
404 }
405
406 #if HARDCODE_DENY_PORTS
407 /* These ports are filtered in the default squid.conf, but
408 * maybe someone wants them hardcoded... */
409 if (port == 7 || port == 9 || port == 19) {
410 debugs(23, DBG_CRITICAL, "urlParse: Deny access to port " << port);
411 return NULL;
412 }
413 #endif
414
415 if (stringHasWhitespace(urlpath)) {
416 debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
417
418 switch (Config.uri_whitespace) {
419
420 case URI_WHITESPACE_DENY:
421 return NULL;
422
423 case URI_WHITESPACE_ALLOW:
424 break;
425
426 case URI_WHITESPACE_ENCODE:
427 t = rfc1738_escape_unescaped(urlpath);
428 xstrncpy(urlpath, t, MAX_URL);
429 break;
430
431 case URI_WHITESPACE_CHOP:
432 *(urlpath + strcspn(urlpath, w_space)) = '\0';
433 break;
434
435 case URI_WHITESPACE_STRIP:
436 default:
437 t = q = urlpath;
438 while (*t) {
439 if (!xisspace(*t)) {
440 *q = *t;
441 ++q;
442 }
443 ++t;
444 }
445 *q = '\0';
446 }
447 }
448
449 return urlParseFinish(method, protocol, urlpath, host, login, port, request);
450 }
451
452 /**
453 * Update request with parsed URI data. If the request arg is
454 * non-NULL, put parsed values there instead of allocating a new
455 * HttpRequest.
456 */
457 static HttpRequest *
458 urlParseFinish(const HttpRequestMethod& method,
459 const AnyP::ProtocolType protocol,
460 const char *const urlpath,
461 const char *const host,
462 const char *const login,
463 const int port,
464 HttpRequest *request)
465 {
466 if (NULL == request)
467 request = new HttpRequest(method, protocol, urlpath);
468 else {
469 request->initHTTP(method, protocol, urlpath);
470 safe_free(request->canonical);
471 }
472
473 request->SetHost(host);
474 xstrncpy(request->login, login, MAX_LOGIN_SZ);
475 request->port = (unsigned short) port;
476 return request;
477 }
478
479 static HttpRequest *
480 urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request)
481 {
482 debugs(50, 5, "urnParse: " << urn);
483 if (request) {
484 request->initHTTP(method, AnyP::PROTO_URN, urn + 4);
485 safe_free(request->canonical);
486 return request;
487 }
488
489 return new HttpRequest(method, AnyP::PROTO_URN, urn + 4);
490 }
491
492 const char *
493 urlCanonical(HttpRequest * request)
494 {
495 LOCAL_ARRAY(char, portbuf, 32);
496 LOCAL_ARRAY(char, urlbuf, MAX_URL);
497
498 if (request->canonical)
499 return request->canonical;
500
501 if (request->url.getScheme() == AnyP::PROTO_URN) {
502 snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
503 SQUIDSTRINGPRINT(request->urlpath));
504 } else {
505 switch (request->method.id()) {
506
507 case Http::METHOD_CONNECT:
508 snprintf(urlbuf, MAX_URL, "%s:%d", request->GetHost(), request->port);
509 break;
510
511 default:
512 {
513 portbuf[0] = '\0';
514
515 if (request->port != urlDefaultPort(request->url.getScheme()))
516 snprintf(portbuf, 32, ":%d", request->port);
517
518 snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s" SQUIDSTRINGPH,
519 request->url.getScheme().c_str(),
520 request->login,
521 *request->login ? "@" : null_string,
522 request->GetHost(),
523 portbuf,
524 SQUIDSTRINGPRINT(request->urlpath));
525 }
526 }
527 }
528
529 return (request->canonical = xstrdup(urlbuf));
530 }
531
532 /** \todo AYJ: Performance: This is an *almost* duplicate of urlCanonical. But elides the query-string.
533 * After copying it on in the first place! Would be less code to merge the two with a flag parameter.
534 * and never copy the query-string part in the first place
535 */
536 char *
537 urlCanonicalClean(const HttpRequest * request)
538 {
539 LOCAL_ARRAY(char, buf, MAX_URL);
540 LOCAL_ARRAY(char, portbuf, 32);
541 LOCAL_ARRAY(char, loginbuf, MAX_LOGIN_SZ + 1);
542 char *t;
543
544 if (request->url.getScheme() == AnyP::PROTO_URN) {
545 snprintf(buf, MAX_URL, "urn:" SQUIDSTRINGPH,
546 SQUIDSTRINGPRINT(request->urlpath));
547 } else {
548 switch (request->method.id()) {
549
550 case Http::METHOD_CONNECT:
551 snprintf(buf, MAX_URL, "%s:%d", request->GetHost(), request->port);
552 break;
553
554 default:
555 {
556 portbuf[0] = '\0';
557
558 if (request->port != urlDefaultPort(request->url.getScheme()))
559 snprintf(portbuf, 32, ":%d", request->port);
560
561 loginbuf[0] = '\0';
562
563 if ((int) strlen(request->login) > 0) {
564 strcpy(loginbuf, request->login);
565
566 if ((t = strchr(loginbuf, ':')))
567 *t = '\0';
568
569 strcat(loginbuf, "@");
570 }
571
572 snprintf(buf, MAX_URL, "%s://%s%s%s" SQUIDSTRINGPH,
573 request->url.getScheme().c_str(),
574 loginbuf,
575 request->GetHost(),
576 portbuf,
577 SQUIDSTRINGPRINT(request->urlpath));
578
579 // strip arguments AFTER a question-mark
580 if (Config.onoff.strip_query_terms)
581 if ((t = strchr(buf, '?')))
582 *(++t) = '\0';
583 }
584 }
585 }
586
587 if (stringHasCntl(buf))
588 xstrncpy(buf, rfc1738_escape_unescaped(buf), MAX_URL);
589
590 return buf;
591 }
592
593 /**
594 * Yet another alternative to urlCanonical.
595 * This one adds the https:// parts to Http::METHOD_CONNECT URL
596 * for use in error page outputs.
597 * Luckily we can leverage the others instead of duplicating.
598 */
599 const char *
600 urlCanonicalFakeHttps(const HttpRequest * request)
601 {
602 LOCAL_ARRAY(char, buf, MAX_URL);
603
604 // method CONNECT and port HTTPS
605 if (request->method == Http::METHOD_CONNECT && request->port == 443) {
606 snprintf(buf, MAX_URL, "https://%s/*", request->GetHost());
607 return buf;
608 }
609
610 // else do the normal complete canonical thing.
611 return urlCanonicalClean(request);
612 }
613
614 /*
615 * Test if a URL is relative.
616 *
617 * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
618 * appear before a ':'.
619 */
620 bool
621 urlIsRelative(const char *url)
622 {
623 const char *p;
624
625 if (url == NULL) {
626 return (false);
627 }
628 if (*url == '\0') {
629 return (false);
630 }
631
632 for (p = url; *p != '\0' && *p != ':' && *p != '/'; ++p);
633
634 if (*p == ':') {
635 return (false);
636 }
637 return (true);
638 }
639
640 /*
641 * Convert a relative URL to an absolute URL using the context of a given
642 * request.
643 *
644 * It is assumed that you have already ensured that the URL is relative.
645 *
646 * If NULL is returned it is an indication that the method in use in the
647 * request does not distinguish between relative and absolute and you should
648 * use the url unchanged.
649 *
650 * If non-NULL is returned, it is up to the caller to free the resulting
651 * memory using safe_free().
652 */
653 char *
654 urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
655 {
656
657 if (req->method.id() == Http::METHOD_CONNECT) {
658 return (NULL);
659 }
660
661 char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
662
663 if (req->url.getScheme() == AnyP::PROTO_URN) {
664 snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
665 SQUIDSTRINGPRINT(req->urlpath));
666 return (urlbuf);
667 }
668
669 size_t urllen;
670
671 if (req->port != urlDefaultPort(req->url.getScheme())) {
672 urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s:%d",
673 req->url.getScheme().c_str(),
674 req->login,
675 *req->login ? "@" : null_string,
676 req->GetHost(),
677 req->port
678 );
679 } else {
680 urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s",
681 req->url.getScheme().c_str(),
682 req->login,
683 *req->login ? "@" : null_string,
684 req->GetHost()
685 );
686 }
687
688 if (relUrl[0] == '/') {
689 strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
690 } else {
691 const char *path = req->urlpath.termedBuf();
692 const char *last_slash = strrchr(path, '/');
693
694 if (last_slash == NULL) {
695 urlbuf[urllen] = '/';
696 ++urllen;
697 strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
698 } else {
699 ++last_slash;
700 size_t pathlen = last_slash - path;
701 if (pathlen > MAX_URL - urllen - 1) {
702 pathlen = MAX_URL - urllen - 1;
703 }
704 strncpy(&urlbuf[urllen], path, pathlen);
705 urllen += pathlen;
706 if (urllen + 1 < MAX_URL) {
707 strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
708 }
709 }
710 }
711
712 return (urlbuf);
713 }
714
715 /*
716 * matchDomainName() compares a hostname with a domainname according
717 * to the following rules:
718 *
719 * HOST DOMAIN MATCH?
720 * ------------- ------------- ------
721 * foo.com foo.com YES
722 * .foo.com foo.com YES
723 * x.foo.com foo.com NO
724 * foo.com .foo.com YES
725 * .foo.com .foo.com YES
726 * x.foo.com .foo.com YES
727 *
728 * We strip leading dots on hosts (but not domains!) so that
729 * ".foo.com" is is always the same as "foo.com".
730 *
731 * Return values:
732 * 0 means the host matches the domain
733 * 1 means the host is greater than the domain
734 * -1 means the host is less than the domain
735 */
736
737 int
738 matchDomainName(const char *h, const char *d)
739 {
740 int dl;
741 int hl;
742
743 while ('.' == *h)
744 ++h;
745
746 hl = strlen(h);
747
748 dl = strlen(d);
749
750 /*
751 * Start at the ends of the two strings and work towards the
752 * beginning.
753 */
754 while (xtolower(h[--hl]) == xtolower(d[--dl])) {
755 if (hl == 0 && dl == 0) {
756 /*
757 * We made it all the way to the beginning of both
758 * strings without finding any difference.
759 */
760 return 0;
761 }
762
763 if (0 == hl) {
764 /*
765 * The host string is shorter than the domain string.
766 * There is only one case when this can be a match.
767 * If the domain is just one character longer, and if
768 * that character is a leading '.' then we call it a
769 * match.
770 */
771
772 if (1 == dl && '.' == d[0])
773 return 0;
774 else
775 return -1;
776 }
777
778 if (0 == dl) {
779 /*
780 * The domain string is shorter than the host string.
781 * This is a match only if the first domain character
782 * is a leading '.'.
783 */
784
785 if ('.' == d[0])
786 return 0;
787 else
788 return 1;
789 }
790 }
791
792 /*
793 * We found different characters in the same position (from the end).
794 */
795 /*
796 * If one of those character is '.' then its special. In order
797 * for splay tree sorting to work properly, "x-foo.com" must
798 * be greater than ".foo.com" even though '-' is less than '.'.
799 */
800 if ('.' == d[dl])
801 return 1;
802
803 if ('.' == h[hl])
804 return -1;
805
806 return (xtolower(h[hl]) - xtolower(d[dl]));
807 }
808
809 /*
810 * return true if we can serve requests for this method.
811 */
812 int
813 urlCheckRequest(const HttpRequest * r)
814 {
815 int rc = 0;
816 /* protocol "independent" methods
817 *
818 * actually these methods are specific to HTTP:
819 * they are methods we recieve on our HTTP port,
820 * and if we had a FTP listener would not be relevant
821 * there.
822 *
823 * So, we should delegate them to HTTP. The problem is that we
824 * do not have a default protocol from the client side of HTTP.
825 */
826
827 if (r->method == Http::METHOD_CONNECT)
828 return 1;
829
830 // we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
831 // we also support forwarding OPTIONS and TRACE, except for the *-URI ones
832 if (r->method == Http::METHOD_OPTIONS || r->method == Http::METHOD_TRACE)
833 return (r->header.getInt64(HDR_MAX_FORWARDS) == 0 || r->urlpath != "*");
834
835 if (r->method == Http::METHOD_PURGE)
836 return 1;
837
838 /* does method match the protocol? */
839 switch (r->url.getScheme()) {
840
841 case AnyP::PROTO_URN:
842
843 case AnyP::PROTO_HTTP:
844
845 case AnyP::PROTO_CACHE_OBJECT:
846 rc = 1;
847 break;
848
849 case AnyP::PROTO_FTP:
850
851 if (r->method == Http::METHOD_PUT)
852 rc = 1;
853
854 case AnyP::PROTO_GOPHER:
855
856 case AnyP::PROTO_WAIS:
857
858 case AnyP::PROTO_WHOIS:
859 if (r->method == Http::METHOD_GET)
860 rc = 1;
861 else if (r->method == Http::METHOD_HEAD)
862 rc = 1;
863
864 break;
865
866 case AnyP::PROTO_HTTPS:
867 #if USE_OPENSSL
868
869 rc = 1;
870
871 break;
872
873 #else
874 /*
875 * Squid can't originate an SSL connection, so it should
876 * never receive an "https:" URL. It should always be
877 * CONNECT instead.
878 */
879 rc = 0;
880
881 #endif
882
883 default:
884 break;
885 }
886
887 return rc;
888 }
889
890 /*
891 * Quick-n-dirty host extraction from a URL. Steps:
892 * Look for a colon
893 * Skip any '/' after the colon
894 * Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
895 * Look for an ending '/' or ':' and terminate
896 * Look for login info preceeded by '@'
897 */
898
899 class URLHostName
900 {
901
902 public:
903 char * extract(char const *url);
904
905 private:
906 static char Host [SQUIDHOSTNAMELEN];
907 void init(char const *);
908 void findHostStart();
909 void trimTrailingChars();
910 void trimAuth();
911 char const *hostStart;
912 char const *url;
913 };
914
915 char *
916 urlHostname(const char *url)
917 {
918 return URLHostName().extract(url);
919 }
920
921 char URLHostName::Host[SQUIDHOSTNAMELEN];
922
923 void
924 URLHostName::init(char const *aUrl)
925 {
926 Host[0] = '\0';
927 url = aUrl;
928 }
929
930 void
931 URLHostName::findHostStart()
932 {
933 if (NULL == (hostStart = strchr(url, ':')))
934 return;
935
936 ++hostStart;
937
938 while (*hostStart != '\0' && *hostStart == '/')
939 ++hostStart;
940
941 if (*hostStart == ']')
942 ++hostStart;
943 }
944
945 void
946 URLHostName::trimTrailingChars()
947 {
948 char *t;
949
950 if ((t = strchr(Host, '/')))
951 *t = '\0';
952
953 if ((t = strrchr(Host, ':')))
954 *t = '\0';
955
956 if ((t = strchr(Host, ']')))
957 *t = '\0';
958 }
959
960 void
961 URLHostName::trimAuth()
962 {
963 char *t;
964
965 if ((t = strrchr(Host, '@'))) {
966 ++t;
967 memmove(Host, t, strlen(t) + 1);
968 }
969 }
970
971 char *
972 URLHostName::extract(char const *aUrl)
973 {
974 init(aUrl);
975 findHostStart();
976
977 if (hostStart == NULL)
978 return NULL;
979
980 xstrncpy(Host, hostStart, SQUIDHOSTNAMELEN);
981
982 trimTrailingChars();
983
984 trimAuth();
985
986 return Host;
987 }