]> git.ipfire.org Git - thirdparty/squid.git/blob - src/url.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / url.cc
1 /*
2 * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /* DEBUG: section 23 URL Parsing */
10
11 #include "squid.h"
12 #include "globals.h"
13 #include "HttpRequest.h"
14 #include "rfc1738.h"
15 #include "SquidConfig.h"
16 #include "SquidString.h"
17 #include "URL.h"
18
19 static HttpRequest *urlParseFinish(const HttpRequestMethod& method,
20 const AnyP::ProtocolType protocol,
21 const char *const protoStr,
22 const char *const urlpath,
23 const char *const host,
24 const SBuf &login,
25 const int port,
26 HttpRequest *request);
27 static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request);
28 static const char valid_hostname_chars_u[] =
29 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
30 "abcdefghijklmnopqrstuvwxyz"
31 "0123456789-._"
32 "[:]"
33 ;
34 static const char valid_hostname_chars[] =
35 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
36 "abcdefghijklmnopqrstuvwxyz"
37 "0123456789-."
38 "[:]"
39 ;
40
41 const SBuf &
42 URL::Asterisk()
43 {
44 static SBuf star("*");
45 return star;
46 }
47
48 const SBuf &
49 URL::SlashPath()
50 {
51 static SBuf slash("/");
52 return slash;
53 }
54
55 void
56 URL::host(const char *src)
57 {
58 hostAddr_.setEmpty();
59 hostAddr_ = src;
60 if (hostAddr_.isAnyAddr()) {
61 xstrncpy(host_, src, sizeof(host_));
62 hostIsNumeric_ = false;
63 } else {
64 hostAddr_.toHostStr(host_, sizeof(host_));
65 debugs(23, 3, "given IP: " << hostAddr_);
66 hostIsNumeric_ = 1;
67 }
68 touch();
69 }
70
71 const SBuf &
72 URL::path() const
73 {
74 // RFC 3986 section 3.3 says path can be empty (path-abempty).
75 // RFC 7230 sections 2.7.3, 5.3.1, 5.7.2 - says path cannot be empty, default to "/"
76 // at least when sending and using. We must still accept path-abempty as input.
77 if (path_.isEmpty() && (scheme_ == AnyP::PROTO_HTTP || scheme_ == AnyP::PROTO_HTTPS))
78 return SlashPath();
79
80 return path_;
81 }
82
83 void
84 urlInitialize(void)
85 {
86 debugs(23, 5, "urlInitialize: Initializing...");
87 /* this ensures that the number of protocol strings is the same as
88 * the enum slots allocated because the last enum is always 'MAX'.
89 */
90 assert(strcmp(AnyP::ProtocolType_str[AnyP::PROTO_MAX], "MAX") == 0);
91 /*
92 * These test that our matchDomainName() function works the
93 * way we expect it to.
94 */
95 assert(0 == matchDomainName("foo.com", "foo.com"));
96 assert(0 == matchDomainName(".foo.com", "foo.com"));
97 assert(0 == matchDomainName("foo.com", ".foo.com"));
98 assert(0 == matchDomainName(".foo.com", ".foo.com"));
99 assert(0 == matchDomainName("x.foo.com", ".foo.com"));
100 assert(0 == matchDomainName("y.x.foo.com", ".foo.com"));
101 assert(0 != matchDomainName("x.foo.com", "foo.com"));
102 assert(0 != matchDomainName("foo.com", "x.foo.com"));
103 assert(0 != matchDomainName("bar.com", "foo.com"));
104 assert(0 != matchDomainName(".bar.com", "foo.com"));
105 assert(0 != matchDomainName(".bar.com", ".foo.com"));
106 assert(0 != matchDomainName("bar.com", ".foo.com"));
107 assert(0 < matchDomainName("zzz.com", "foo.com"));
108 assert(0 > matchDomainName("aaa.com", "foo.com"));
109 assert(0 == matchDomainName("FOO.com", "foo.COM"));
110 assert(0 < matchDomainName("bfoo.com", "afoo.com"));
111 assert(0 > matchDomainName("afoo.com", "bfoo.com"));
112 assert(0 < matchDomainName("x-foo.com", ".foo.com"));
113
114 assert(0 == matchDomainName(".foo.com", ".foo.com", mdnRejectSubsubDomains));
115 assert(0 == matchDomainName("x.foo.com", ".foo.com", mdnRejectSubsubDomains));
116 assert(0 != matchDomainName("y.x.foo.com", ".foo.com", mdnRejectSubsubDomains));
117 assert(0 != matchDomainName(".x.foo.com", ".foo.com", mdnRejectSubsubDomains));
118
119 assert(0 == matchDomainName("*.foo.com", "x.foo.com", mdnHonorWildcards));
120 assert(0 == matchDomainName("*.foo.com", ".x.foo.com", mdnHonorWildcards));
121 assert(0 == matchDomainName("*.foo.com", ".foo.com", mdnHonorWildcards));
122 assert(0 != matchDomainName("*.foo.com", "foo.com", mdnHonorWildcards));
123
124 /* more cases? */
125 }
126
127 /**
128 * Parse the scheme name from string b, into protocol type.
129 * The string must be 0-terminated.
130 */
131 AnyP::ProtocolType
132 urlParseProtocol(const char *b)
133 {
134 // make e point to the ':' character
135 const char *e = b + strcspn(b, ":");
136 int len = e - b;
137
138 /* test common stuff first */
139
140 if (strncasecmp(b, "http", len) == 0)
141 return AnyP::PROTO_HTTP;
142
143 if (strncasecmp(b, "ftp", len) == 0)
144 return AnyP::PROTO_FTP;
145
146 if (strncasecmp(b, "https", len) == 0)
147 return AnyP::PROTO_HTTPS;
148
149 if (strncasecmp(b, "file", len) == 0)
150 return AnyP::PROTO_FTP;
151
152 if (strncasecmp(b, "coap", len) == 0)
153 return AnyP::PROTO_COAP;
154
155 if (strncasecmp(b, "coaps", len) == 0)
156 return AnyP::PROTO_COAPS;
157
158 if (strncasecmp(b, "gopher", len) == 0)
159 return AnyP::PROTO_GOPHER;
160
161 if (strncasecmp(b, "wais", len) == 0)
162 return AnyP::PROTO_WAIS;
163
164 if (strncasecmp(b, "cache_object", len) == 0)
165 return AnyP::PROTO_CACHE_OBJECT;
166
167 if (strncasecmp(b, "urn", len) == 0)
168 return AnyP::PROTO_URN;
169
170 if (strncasecmp(b, "whois", len) == 0)
171 return AnyP::PROTO_WHOIS;
172
173 if (len > 0)
174 return AnyP::PROTO_UNKNOWN;
175
176 return AnyP::PROTO_NONE;
177 }
178
179 /*
180 * Parse a URI/URL.
181 *
182 * If the 'request' arg is non-NULL, put parsed values there instead
183 * of allocating a new HttpRequest.
184 *
185 * This abuses HttpRequest as a way of representing the parsed url
186 * and its components.
187 * method is used to switch parsers and to init the HttpRequest.
188 * If method is Http::METHOD_CONNECT, then rather than a URL a hostname:port is
189 * looked for.
190 * The url is non const so that if its too long we can NULL-terminate it in place.
191 */
192
193 /*
194 * This routine parses a URL. Its assumed that the URL is complete -
195 * ie, the end of the string is the end of the URL. Don't pass a partial
196 * URL here as this routine doesn't have any way of knowing whether
197 * its partial or not (ie, it handles the case of no trailing slash as
198 * being "end of host with implied path of /".
199 */
200 HttpRequest *
201 urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
202 {
203 LOCAL_ARRAY(char, proto, MAX_URL);
204 LOCAL_ARRAY(char, login, MAX_URL);
205 LOCAL_ARRAY(char, host, MAX_URL);
206 LOCAL_ARRAY(char, urlpath, MAX_URL);
207 char *t = NULL;
208 char *q = NULL;
209 int port;
210 AnyP::ProtocolType protocol = AnyP::PROTO_NONE;
211 int l;
212 int i;
213 const char *src;
214 char *dst;
215 proto[0] = host[0] = urlpath[0] = login[0] = '\0';
216
217 if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
218 /* terminate so it doesn't overflow other buffers */
219 *(url + (MAX_URL >> 1)) = '\0';
220 debugs(23, DBG_IMPORTANT, "urlParse: URL too large (" << l << " bytes)");
221 return NULL;
222 }
223 if (method == Http::METHOD_CONNECT) {
224 port = CONNECT_PORT;
225
226 if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
227 if (sscanf(url, "%[^:]:%d", host, &port) < 1)
228 return NULL;
229
230 } else if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
231 URL::Asterisk().cmp(url) == 0) {
232 protocol = AnyP::PROTO_HTTP;
233 port = 80; // or the slow way ... AnyP::UriScheme(protocol,"http").defaultPort();
234 return urlParseFinish(method, protocol, "http", url, host, SBuf(), port, request);
235 } else if (!strncmp(url, "urn:", 4)) {
236 return urnParse(method, url, request);
237 } else {
238 /* Parse the URL: */
239 src = url;
240 i = 0;
241 /* Find first : - everything before is protocol */
242 for (i = 0, dst = proto; i < l && *src != ':'; ++i, ++src, ++dst) {
243 *dst = *src;
244 }
245 if (i >= l)
246 return NULL;
247 *dst = '\0';
248
249 /* Then its :// */
250 if ((i+3) > l || *src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
251 return NULL;
252 i += 3;
253 src += 3;
254
255 /* Then everything until first /; thats host (and port; which we'll look for here later) */
256 // bug 1881: If we don't get a "/" then we imply it was there
257 // bug 3074: We could just be given a "?" or "#". These also imply "/"
258 // bug 3233: whitespace is also a hostname delimiter.
259 for (dst = host; i < l && *src != '/' && *src != '?' && *src != '#' && *src != '\0' && !xisspace(*src); ++i, ++src, ++dst) {
260 *dst = *src;
261 }
262
263 /*
264 * We can't check for "i >= l" here because we could be at the end of the line
265 * and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
266 * been -given- a valid URL and the path is just '/'.
267 */
268 if (i > l)
269 return NULL;
270 *dst = '\0';
271
272 // bug 3074: received 'path' starting with '?', '#', or '\0' implies '/'
273 if (*src == '?' || *src == '#' || *src == '\0') {
274 urlpath[0] = '/';
275 dst = &urlpath[1];
276 } else {
277 dst = urlpath;
278 }
279 /* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
280 for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
281 *dst = *src;
282 }
283
284 /* We -could- be at the end of the buffer here */
285 if (i > l)
286 return NULL;
287 /* If the URL path is empty we set it to be "/" */
288 if (dst == urlpath) {
289 *dst = '/';
290 ++dst;
291 }
292 *dst = '\0';
293
294 protocol = urlParseProtocol(proto);
295 port = AnyP::UriScheme(protocol).defaultPort();
296
297 /* Is there any login information? (we should eventually parse it above) */
298 t = strrchr(host, '@');
299 if (t != NULL) {
300 strncpy((char *) login, (char *) host, sizeof(login)-1);
301 login[sizeof(login)-1] = '\0';
302 t = strrchr(login, '@');
303 *t = 0;
304 strncpy((char *) host, t + 1, sizeof(host)-1);
305 host[sizeof(host)-1] = '\0';
306 // Bug 4498: URL-unescape the login info after extraction
307 rfc1738_unescape(login);
308 }
309
310 /* Is there any host information? (we should eventually parse it above) */
311 if (*host == '[') {
312 /* strip any IPA brackets. valid under IPv6. */
313 dst = host;
314 /* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
315 src = host;
316 ++src;
317 l = strlen(host);
318 i = 1;
319 for (; i < l && *src != ']' && *src != '\0'; ++i, ++src, ++dst) {
320 *dst = *src;
321 }
322
323 /* we moved in-place, so truncate the actual hostname found */
324 *dst = '\0';
325 ++dst;
326
327 /* skip ahead to either start of port, or original EOS */
328 while (*dst != '\0' && *dst != ':')
329 ++dst;
330 t = dst;
331 } else {
332 t = strrchr(host, ':');
333
334 if (t != strchr(host,':') ) {
335 /* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
336 /* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
337 /* therefore we MUST accept the case where they are not bracketed at all. */
338 t = NULL;
339 }
340 }
341
342 // Bug 3183 sanity check: If scheme is present, host must be too.
343 if (protocol != AnyP::PROTO_NONE && host[0] == '\0') {
344 debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details.");
345 return NULL;
346 }
347
348 if (t && *t == ':') {
349 *t = '\0';
350 ++t;
351 port = atoi(t);
352 }
353 }
354
355 for (t = host; *t; ++t)
356 *t = xtolower(*t);
357
358 if (stringHasWhitespace(host)) {
359 if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
360 t = q = host;
361 while (*t) {
362 if (!xisspace(*t)) {
363 *q = *t;
364 ++q;
365 }
366 ++t;
367 }
368 *q = '\0';
369 }
370 }
371
372 debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
373
374 if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
375 debugs(23, DBG_IMPORTANT, "urlParse: Illegal character in hostname '" << host << "'");
376 return NULL;
377 }
378
379 /* For IPV6 addresses also check for a colon */
380 if (Config.appendDomain && !strchr(host, '.') && !strchr(host, ':'))
381 strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
382
383 /* remove trailing dots from hostnames */
384 while ((l = strlen(host)) > 0 && host[--l] == '.')
385 host[l] = '\0';
386
387 /* reject duplicate or leading dots */
388 if (strstr(host, "..") || *host == '.') {
389 debugs(23, DBG_IMPORTANT, "urlParse: Illegal hostname '" << host << "'");
390 return NULL;
391 }
392
393 if (port < 1 || port > 65535) {
394 debugs(23, 3, "urlParse: Invalid port '" << port << "'");
395 return NULL;
396 }
397
398 #if HARDCODE_DENY_PORTS
399 /* These ports are filtered in the default squid.conf, but
400 * maybe someone wants them hardcoded... */
401 if (port == 7 || port == 9 || port == 19) {
402 debugs(23, DBG_CRITICAL, "urlParse: Deny access to port " << port);
403 return NULL;
404 }
405 #endif
406
407 if (stringHasWhitespace(urlpath)) {
408 debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
409
410 switch (Config.uri_whitespace) {
411
412 case URI_WHITESPACE_DENY:
413 return NULL;
414
415 case URI_WHITESPACE_ALLOW:
416 break;
417
418 case URI_WHITESPACE_ENCODE:
419 t = rfc1738_escape_unescaped(urlpath);
420 xstrncpy(urlpath, t, MAX_URL);
421 break;
422
423 case URI_WHITESPACE_CHOP:
424 *(urlpath + strcspn(urlpath, w_space)) = '\0';
425 break;
426
427 case URI_WHITESPACE_STRIP:
428 default:
429 t = q = urlpath;
430 while (*t) {
431 if (!xisspace(*t)) {
432 *q = *t;
433 ++q;
434 }
435 ++t;
436 }
437 *q = '\0';
438 }
439 }
440
441 return urlParseFinish(method, protocol, proto, urlpath, host, SBuf(login), port, request);
442 }
443
444 /**
445 * Update request with parsed URI data. If the request arg is
446 * non-NULL, put parsed values there instead of allocating a new
447 * HttpRequest.
448 */
449 static HttpRequest *
450 urlParseFinish(const HttpRequestMethod& method,
451 const AnyP::ProtocolType protocol,
452 const char *const protoStr, // for unknown protocols
453 const char *const urlpath,
454 const char *const host,
455 const SBuf &login,
456 const int port,
457 HttpRequest *request)
458 {
459 if (NULL == request)
460 request = new HttpRequest(method, protocol, protoStr, urlpath);
461 else {
462 request->initHTTP(method, protocol, protoStr, urlpath);
463 }
464
465 request->url.host(host);
466 request->url.userInfo(login);
467 request->url.port(port);
468 return request;
469 }
470
471 static HttpRequest *
472 urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request)
473 {
474 debugs(50, 5, "urnParse: " << urn);
475 if (request) {
476 request->initHTTP(method, AnyP::PROTO_URN, "urn", urn + 4);
477 return request;
478 }
479
480 return new HttpRequest(method, AnyP::PROTO_URN, "urn", urn + 4);
481 }
482
483 void
484 URL::touch()
485 {
486 absolute_.clear();
487 authorityHttp_.clear();
488 authorityWithPort_.clear();
489 }
490
491 SBuf &
492 URL::authority(bool requirePort) const
493 {
494 if (authorityHttp_.isEmpty()) {
495
496 // both formats contain Host/IP
497 authorityWithPort_.append(host());
498 authorityHttp_ = authorityWithPort_;
499
500 // authorityForm_ only has :port if it is non-default
501 authorityWithPort_.appendf(":%u",port());
502 if (port() != getScheme().defaultPort())
503 authorityHttp_ = authorityWithPort_;
504 }
505
506 return requirePort ? authorityWithPort_ : authorityHttp_;
507 }
508
509 SBuf &
510 URL::absolute() const
511 {
512 if (absolute_.isEmpty()) {
513 // TODO: most URL will be much shorter, avoid allocating this much
514 absolute_.reserveCapacity(MAX_URL);
515
516 absolute_.append(getScheme().image());
517 absolute_.append(":",1);
518 if (getScheme() != AnyP::PROTO_URN) {
519 absolute_.append("//", 2);
520 const bool omitUserInfo = getScheme() == AnyP::PROTO_HTTP ||
521 getScheme() != AnyP::PROTO_HTTPS ||
522 userInfo().isEmpty();
523 if (!omitUserInfo) {
524 absolute_.append(userInfo());
525 absolute_.append("@", 1);
526 }
527 absolute_.append(authority());
528 }
529 absolute_.append(path());
530 }
531
532 return absolute_;
533 }
534
535 /** \todo AYJ: Performance: This is an *almost* duplicate of HttpRequest::effectiveRequestUri(). But elides the query-string.
536 * After copying it on in the first place! Would be less code to merge the two with a flag parameter.
537 * and never copy the query-string part in the first place
538 */
539 char *
540 urlCanonicalClean(const HttpRequest * request)
541 {
542 LOCAL_ARRAY(char, buf, MAX_URL);
543
544 snprintf(buf, sizeof(buf), SQUIDSBUFPH, SQUIDSBUFPRINT(request->effectiveRequestUri()));
545 buf[sizeof(buf)-1] = '\0';
546
547 // URN, CONNECT method, and non-stripped URIs can go straight out
548 if (Config.onoff.strip_query_terms && !(request->method == Http::METHOD_CONNECT || request->url.getScheme() == AnyP::PROTO_URN)) {
549 // strip anything AFTER a question-mark
550 // leaving the '?' in place
551 if (auto t = strchr(buf, '?')) {
552 *(++t) = '\0';
553 }
554 }
555
556 if (stringHasCntl(buf))
557 xstrncpy(buf, rfc1738_escape_unescaped(buf), MAX_URL);
558
559 return buf;
560 }
561
562 /**
563 * Yet another alternative to urlCanonical.
564 * This one adds the https:// parts to Http::METHOD_CONNECT URL
565 * for use in error page outputs.
566 * Luckily we can leverage the others instead of duplicating.
567 */
568 const char *
569 urlCanonicalFakeHttps(const HttpRequest * request)
570 {
571 LOCAL_ARRAY(char, buf, MAX_URL);
572
573 // method CONNECT and port HTTPS
574 if (request->method == Http::METHOD_CONNECT && request->url.port() == 443) {
575 snprintf(buf, MAX_URL, "https://%s/*", request->url.host());
576 return buf;
577 }
578
579 // else do the normal complete canonical thing.
580 return urlCanonicalClean(request);
581 }
582
583 /*
584 * Test if a URL is relative.
585 *
586 * RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
587 * appear before a ':'.
588 */
589 bool
590 urlIsRelative(const char *url)
591 {
592 const char *p;
593
594 if (url == NULL) {
595 return (false);
596 }
597 if (*url == '\0') {
598 return (false);
599 }
600
601 for (p = url; *p != '\0' && *p != ':' && *p != '/'; ++p);
602
603 if (*p == ':') {
604 return (false);
605 }
606 return (true);
607 }
608
609 /*
610 * Convert a relative URL to an absolute URL using the context of a given
611 * request.
612 *
613 * It is assumed that you have already ensured that the URL is relative.
614 *
615 * If NULL is returned it is an indication that the method in use in the
616 * request does not distinguish between relative and absolute and you should
617 * use the url unchanged.
618 *
619 * If non-NULL is returned, it is up to the caller to free the resulting
620 * memory using safe_free().
621 */
622 char *
623 urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
624 {
625
626 if (req->method.id() == Http::METHOD_CONNECT) {
627 return (NULL);
628 }
629
630 char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
631
632 if (req->url.getScheme() == AnyP::PROTO_URN) {
633 // XXX: this is what the original code did, but it seems to break the
634 // intended behaviour of this function. It returns the stored URN path,
635 // not converting the given one into a URN...
636 snprintf(urlbuf, MAX_URL, SQUIDSBUFPH, SQUIDSBUFPRINT(req->url.absolute()));
637 return (urlbuf);
638 }
639
640 SBuf authorityForm = req->url.authority(); // host[:port]
641 const SBuf &scheme = req->url.getScheme().image();
642 size_t urllen = snprintf(urlbuf, MAX_URL, SQUIDSBUFPH "://" SQUIDSBUFPH "%s" SQUIDSBUFPH,
643 SQUIDSBUFPRINT(scheme),
644 SQUIDSBUFPRINT(req->url.userInfo()),
645 !req->url.userInfo().isEmpty() ? "@" : "",
646 SQUIDSBUFPRINT(authorityForm));
647
648 // if the first char is '/' assume its a relative path
649 // XXX: this breaks on scheme-relative URLs,
650 // but we should not see those outside ESI, and rarely there.
651 // XXX: also breaks on any URL containing a '/' in the query-string portion
652 if (relUrl[0] == '/') {
653 xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
654 } else {
655 SBuf path = req->url.path();
656 SBuf::size_type lastSlashPos = path.rfind('/');
657
658 if (lastSlashPos == SBuf::npos) {
659 // replace the whole path with the given bit(s)
660 urlbuf[urllen] = '/';
661 ++urllen;
662 xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
663 } else {
664 // replace only the last (file?) segment with the given bit(s)
665 ++lastSlashPos;
666 if (lastSlashPos > MAX_URL - urllen - 1) {
667 // XXX: crops bits in the middle of the combined URL.
668 lastSlashPos = MAX_URL - urllen - 1;
669 }
670 SBufToCstring(&urlbuf[urllen], path.substr(0,lastSlashPos));
671 urllen += lastSlashPos;
672 if (urllen + 1 < MAX_URL) {
673 xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
674 }
675 }
676 }
677
678 return (urlbuf);
679 }
680
681 int
682 matchDomainName(const char *h, const char *d, uint flags)
683 {
684 int dl;
685 int hl;
686
687 const bool hostIncludesSubdomains = (*h == '.');
688 while ('.' == *h)
689 ++h;
690
691 hl = strlen(h);
692
693 if (hl == 0)
694 return -1;
695
696 dl = strlen(d);
697
698 /*
699 * Start at the ends of the two strings and work towards the
700 * beginning.
701 */
702 while (xtolower(h[--hl]) == xtolower(d[--dl])) {
703 if (hl == 0 && dl == 0) {
704 /*
705 * We made it all the way to the beginning of both
706 * strings without finding any difference.
707 */
708 return 0;
709 }
710
711 if (0 == hl) {
712 /*
713 * The host string is shorter than the domain string.
714 * There is only one case when this can be a match.
715 * If the domain is just one character longer, and if
716 * that character is a leading '.' then we call it a
717 * match.
718 */
719
720 if (1 == dl && '.' == d[0])
721 return 0;
722 else
723 return -1;
724 }
725
726 if (0 == dl) {
727 /*
728 * The domain string is shorter than the host string.
729 * This is a match only if the first domain character
730 * is a leading '.'.
731 */
732
733 if ('.' == d[0]) {
734 if (flags & mdnRejectSubsubDomains) {
735 // Check for sub-sub domain and reject
736 while(--hl >= 0 && h[hl] != '.');
737 if (hl < 0) {
738 // No sub-sub domain found, but reject if there is a
739 // leading dot in given host string (which is removed
740 // before the check is started).
741 return hostIncludesSubdomains ? 1 : 0;
742 } else
743 return 1; // sub-sub domain, reject
744 } else
745 return 0;
746 } else
747 return 1;
748 }
749 }
750
751 /*
752 * We found different characters in the same position (from the end).
753 */
754
755 // If the h has a form of "*.foo.com" and d has a form of "x.foo.com"
756 // then the h[hl] points to '*', h[hl+1] to '.' and d[dl] to 'x'
757 // The following checks are safe, the "h[hl + 1]" in the worst case is '\0'.
758 if ((flags & mdnHonorWildcards) && h[hl] == '*' && h[hl + 1] == '.')
759 return 0;
760
761 /*
762 * If one of those character is '.' then its special. In order
763 * for splay tree sorting to work properly, "x-foo.com" must
764 * be greater than ".foo.com" even though '-' is less than '.'.
765 */
766 if ('.' == d[dl])
767 return 1;
768
769 if ('.' == h[hl])
770 return -1;
771
772 return (xtolower(h[hl]) - xtolower(d[dl]));
773 }
774
775 /*
776 * return true if we can serve requests for this method.
777 */
778 int
779 urlCheckRequest(const HttpRequest * r)
780 {
781 int rc = 0;
782 /* protocol "independent" methods
783 *
784 * actually these methods are specific to HTTP:
785 * they are methods we recieve on our HTTP port,
786 * and if we had a FTP listener would not be relevant
787 * there.
788 *
789 * So, we should delegate them to HTTP. The problem is that we
790 * do not have a default protocol from the client side of HTTP.
791 */
792
793 if (r->method == Http::METHOD_CONNECT)
794 return 1;
795
796 // we support OPTIONS and TRACE directed at us (with a 501 reply, for now)
797 // we also support forwarding OPTIONS and TRACE, except for the *-URI ones
798 if (r->method == Http::METHOD_OPTIONS || r->method == Http::METHOD_TRACE)
799 return (r->header.getInt64(Http::HdrType::MAX_FORWARDS) == 0 || r->url.path() != URL::Asterisk());
800
801 if (r->method == Http::METHOD_PURGE)
802 return 1;
803
804 /* does method match the protocol? */
805 switch (r->url.getScheme()) {
806
807 case AnyP::PROTO_URN:
808
809 case AnyP::PROTO_HTTP:
810
811 case AnyP::PROTO_CACHE_OBJECT:
812 rc = 1;
813 break;
814
815 case AnyP::PROTO_FTP:
816
817 if (r->method == Http::METHOD_PUT)
818 rc = 1;
819
820 case AnyP::PROTO_GOPHER:
821
822 case AnyP::PROTO_WAIS:
823
824 case AnyP::PROTO_WHOIS:
825 if (r->method == Http::METHOD_GET)
826 rc = 1;
827 else if (r->method == Http::METHOD_HEAD)
828 rc = 1;
829
830 break;
831
832 case AnyP::PROTO_HTTPS:
833 #if USE_OPENSSL
834 rc = 1;
835 #else
836 /*
837 * Squid can't originate an SSL connection, so it should
838 * never receive an "https:" URL. It should always be
839 * CONNECT instead.
840 */
841 rc = 0;
842 #endif
843 break;
844
845 default:
846 break;
847 }
848
849 return rc;
850 }
851
852 /*
853 * Quick-n-dirty host extraction from a URL. Steps:
854 * Look for a colon
855 * Skip any '/' after the colon
856 * Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
857 * Look for an ending '/' or ':' and terminate
858 * Look for login info preceeded by '@'
859 */
860
861 class URLHostName
862 {
863
864 public:
865 char * extract(char const *url);
866
867 private:
868 static char Host [SQUIDHOSTNAMELEN];
869 void init(char const *);
870 void findHostStart();
871 void trimTrailingChars();
872 void trimAuth();
873 char const *hostStart;
874 char const *url;
875 };
876
877 char *
878 urlHostname(const char *url)
879 {
880 return URLHostName().extract(url);
881 }
882
883 char URLHostName::Host[SQUIDHOSTNAMELEN];
884
885 void
886 URLHostName::init(char const *aUrl)
887 {
888 Host[0] = '\0';
889 url = aUrl;
890 }
891
892 void
893 URLHostName::findHostStart()
894 {
895 if (NULL == (hostStart = strchr(url, ':')))
896 return;
897
898 ++hostStart;
899
900 while (*hostStart != '\0' && *hostStart == '/')
901 ++hostStart;
902
903 if (*hostStart == ']')
904 ++hostStart;
905 }
906
907 void
908 URLHostName::trimTrailingChars()
909 {
910 char *t;
911
912 if ((t = strchr(Host, '/')))
913 *t = '\0';
914
915 if ((t = strrchr(Host, ':')))
916 *t = '\0';
917
918 if ((t = strchr(Host, ']')))
919 *t = '\0';
920 }
921
922 void
923 URLHostName::trimAuth()
924 {
925 char *t;
926
927 if ((t = strrchr(Host, '@'))) {
928 ++t;
929 memmove(Host, t, strlen(t) + 1);
930 }
931 }
932
933 char *
934 URLHostName::extract(char const *aUrl)
935 {
936 init(aUrl);
937 findHostStart();
938
939 if (hostStart == NULL)
940 return NULL;
941
942 xstrncpy(Host, hostStart, SQUIDHOSTNAMELEN);
943
944 trimTrailingChars();
945
946 trimAuth();
947
948 return Host;
949 }
950
951 URL::URL(AnyP::UriScheme const &aScheme) :
952 scheme_(aScheme),
953 hostIsNumeric_(false),
954 port_(0)
955 {
956 *host_=0;
957 }
958