]> git.ipfire.org Git - thirdparty/squid.git/blob - src/url.cc
Bugzilla 403: url_convert_hex buffer overrun
[thirdparty/squid.git] / src / url.cc
1
2 /*
3 * $Id: url.cc,v 1.135 2002/08/19 22:47:54 hno Exp $
4 *
5 * DEBUG: section 23 URL Parsing
6 * AUTHOR: Duane Wessels
7 *
8 * SQUID Web Proxy Cache http://www.squid-cache.org/
9 * ----------------------------------------------------------
10 *
11 * Squid is the result of efforts by numerous individuals from
12 * the Internet community; see the CONTRIBUTORS file for full
13 * details. Many organizations have provided support for Squid's
14 * development; see the SPONSORS file for full details. Squid is
15 * Copyrighted (C) 2001 by the Regents of the University of
16 * California; see the COPYRIGHT file for full details. Squid
17 * incorporates software developed and/or copyrighted by other
18 * sources; see the CREDITS file for full details.
19 *
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
33 *
34 */
35
36 #include "squid.h"
37
38 const char *RequestMethodStr[] =
39 {
40 "NONE",
41 "GET",
42 "POST",
43 "PUT",
44 "HEAD",
45 "CONNECT",
46 "TRACE",
47 "PURGE",
48 "OPTIONS",
49 "DELETE",
50 "PROPFIND",
51 "PROPPATCH",
52 "MKCOL",
53 "COPY",
54 "MOVE",
55 "LOCK",
56 "UNLOCK",
57 "BMOVE",
58 "BDELETE",
59 "BPROPFIND",
60 "BPROPPATCH",
61 "BCOPY",
62 "SEARCH",
63 "SUBSCRIBE",
64 "UNSUBSCRIBE",
65 "POLL",
66 "%EXT00",
67 "%EXT01",
68 "%EXT02",
69 "%EXT03",
70 "%EXT04",
71 "%EXT05",
72 "%EXT06",
73 "%EXT07",
74 "%EXT08",
75 "%EXT09",
76 "%EXT10",
77 "%EXT11",
78 "%EXT12",
79 "%EXT13",
80 "%EXT14",
81 "%EXT15",
82 "%EXT16",
83 "%EXT17",
84 "%EXT18",
85 "%EXT19",
86 "ERROR"
87 };
88
89 const char *ProtocolStr[] =
90 {
91 "NONE",
92 "http",
93 "ftp",
94 "gopher",
95 "wais",
96 "cache_object",
97 "icp",
98 #if USE_HTCP
99 "htcp",
100 #endif
101 "urn",
102 "whois",
103 "internal",
104 "https",
105 "TOTAL"
106 };
107
108 static request_t *urnParse(method_t method, char *urn);
109 static const char *const valid_hostname_chars =
110 #if ALLOW_HOSTNAME_UNDERSCORES
111 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
112 "abcdefghijklmnopqrstuvwxyz"
113 "0123456789-._";
114 #else
115 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
116 "abcdefghijklmnopqrstuvwxyz"
117 "0123456789-.";
118 #endif
119
120 /* convert %xx in url string to a character
121 * Allocate a new string and return a pointer to converted string */
122
123 char *
124 url_convert_hex(char *org_url, int allocate)
125 {
126 static char code[] = "00";
127 char *url = NULL;
128 char *s = NULL;
129 char *t = NULL;
130 url = allocate ? (char *) xstrdup(org_url) : org_url;
131 if ((int) strlen(url) < 3 || !strchr(url, '%'))
132 return url;
133 for (s = t = url; *s; s++) {
134 if (*s == '%' && *(s + 1) && *(s + 2)) {
135 code[0] = *(++s);
136 code[1] = *(++s);
137 *t++ = (char) strtol(code, NULL, 16);
138 } else {
139 *t++ = *s;
140 }
141 }
142 do {
143 *t++ = *s;
144 } while (*s++);
145 return url;
146 }
147
148 void
149 urlInitialize(void)
150 {
151 debug(23, 5) ("urlInitialize: Initializing...\n");
152 assert(sizeof(ProtocolStr) == (PROTO_MAX + 1) * sizeof(char *));
153 memset(&null_request_flags, '\0', sizeof(null_request_flags));
154 /*
155 * These test that our matchDomainName() function works the
156 * way we expect it to.
157 */
158 assert(0 == matchDomainName("foo.com", "foo.com"));
159 assert(0 == matchDomainName(".foo.com", "foo.com"));
160 assert(0 == matchDomainName("foo.com", ".foo.com"));
161 assert(0 == matchDomainName(".foo.com", ".foo.com"));
162 assert(0 == matchDomainName("x.foo.com", ".foo.com"));
163 assert(0 != matchDomainName("x.foo.com", "foo.com"));
164 assert(0 != matchDomainName("foo.com", "x.foo.com"));
165 assert(0 != matchDomainName("bar.com", "foo.com"));
166 assert(0 != matchDomainName(".bar.com", "foo.com"));
167 assert(0 != matchDomainName(".bar.com", ".foo.com"));
168 assert(0 != matchDomainName("bar.com", ".foo.com"));
169 assert(0 < matchDomainName("zzz.com", "foo.com"));
170 assert(0 > matchDomainName("aaa.com", "foo.com"));
171 assert(0 == matchDomainName("FOO.com", "foo.COM"));
172 assert(0 < matchDomainName("bfoo.com", "afoo.com"));
173 assert(0 > matchDomainName("afoo.com", "bfoo.com"));
174 assert(0 < matchDomainName("x-foo.com", ".foo.com"));
175 /* more cases? */
176 }
177
178 method_t
179 urlParseMethod(const char *s)
180 {
181 method_t method = METHOD_NONE;
182 /*
183 * This check for '%' makes sure that we don't
184 * match one of the extension method placeholders,
185 * which have the form %EXT[0-9][0-9]
186 */
187 if (*s == '%')
188 return METHOD_NONE;
189 for (method++; method < METHOD_ENUM_END; method++) {
190 if (0 == strcasecmp(s, RequestMethodStr[method]))
191 return method;
192 }
193 return METHOD_NONE;
194 }
195
196
197 protocol_t
198 urlParseProtocol(const char *s)
199 {
200 /* test common stuff first */
201 if (strcasecmp(s, "http") == 0)
202 return PROTO_HTTP;
203 if (strcasecmp(s, "ftp") == 0)
204 return PROTO_FTP;
205 if (strcasecmp(s, "https") == 0)
206 return PROTO_HTTPS;
207 if (strcasecmp(s, "file") == 0)
208 return PROTO_FTP;
209 if (strcasecmp(s, "gopher") == 0)
210 return PROTO_GOPHER;
211 if (strcasecmp(s, "wais") == 0)
212 return PROTO_WAIS;
213 if (strcasecmp(s, "cache_object") == 0)
214 return PROTO_CACHEOBJ;
215 if (strcasecmp(s, "urn") == 0)
216 return PROTO_URN;
217 if (strcasecmp(s, "whois") == 0)
218 return PROTO_WHOIS;
219 if (strcasecmp(s, "internal") == 0)
220 return PROTO_INTERNAL;
221 return PROTO_NONE;
222 }
223
224
225 int
226 urlDefaultPort(protocol_t p)
227 {
228 switch (p) {
229 case PROTO_HTTP:
230 return 80;
231 case PROTO_HTTPS:
232 return 443;
233 case PROTO_FTP:
234 return 21;
235 case PROTO_GOPHER:
236 return 70;
237 case PROTO_WAIS:
238 return 210;
239 case PROTO_CACHEOBJ:
240 case PROTO_INTERNAL:
241 return CACHE_HTTP_PORT;
242 case PROTO_WHOIS:
243 return 43;
244 default:
245 return 0;
246 }
247 }
248
249 request_t *
250 urlParse(method_t method, char *url)
251 {
252 LOCAL_ARRAY(char, proto, MAX_URL);
253 LOCAL_ARRAY(char, login, MAX_URL);
254 LOCAL_ARRAY(char, host, MAX_URL);
255 LOCAL_ARRAY(char, urlpath, MAX_URL);
256 request_t *request = NULL;
257 char *t = NULL;
258 char *q = NULL;
259 int port;
260 protocol_t protocol = PROTO_NONE;
261 int l;
262 proto[0] = host[0] = urlpath[0] = login[0] = '\0';
263
264 if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
265 /* terminate so it doesn't overflow other buffers */
266 *(url + (MAX_URL >> 1)) = '\0';
267 debug(23, 1) ("urlParse: URL too large (%d bytes)\n", l);
268 return NULL;
269 }
270 if (method == METHOD_CONNECT) {
271 port = CONNECT_PORT;
272 if (sscanf(url, "%[^:]:%d", host, &port) < 1)
273 return NULL;
274 } else if (!strncmp(url, "urn:", 4)) {
275 return urnParse(method, url);
276 } else {
277 if (sscanf(url, "%[^:]://%[^/]%[^\r\n]", proto, host, urlpath) < 2)
278 return NULL;
279 protocol = urlParseProtocol(proto);
280 port = urlDefaultPort(protocol);
281 /* Is there any login informaiton? */
282 if ((t = strrchr(host, '@'))) {
283 strcpy((char *) login, (char *) host);
284 t = strrchr(login, '@');
285 *t = 0;
286 strcpy((char *) host, t + 1);
287 }
288 if ((t = strrchr(host, ':'))) {
289 *t++ = '\0';
290 if (*t != '\0')
291 port = atoi(t);
292 }
293 }
294 for (t = host; *t; t++)
295 *t = xtolower(*t);
296 if (stringHasWhitespace(host)) {
297 if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
298 t = q = host;
299 while (*t) {
300 if (!xisspace(*t))
301 *q++ = *t;
302 t++;
303 }
304 *q = '\0';
305 }
306 }
307 if (strspn(host, valid_hostname_chars) != strlen(host)) {
308 debug(23, 1) ("urlParse: Illegal character in hostname '%s'\n", host);
309 return NULL;
310 }
311 /* remove trailing dots from hostnames */
312 while ((l = strlen(host)) > 0 && host[--l] == '.')
313 host[l] = '\0';
314 /* remove duplicate dots */
315 while ((t = strstr(host, "..")))
316 xmemmove(t, t + 1, strlen(t));
317 if (Config.appendDomain && !strchr(host, '.'))
318 strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN);
319 if (port == 0) {
320 debug(23, 3) ("urlParse: Invalid port == 0\n");
321 return NULL;
322 }
323 #ifdef HARDCODE_DENY_PORTS
324 /* These ports are filtered in the default squid.conf, but
325 * maybe someone wants them hardcoded... */
326 if (port == 7 || port == 9 || port == 19) {
327 debug(23, 0) ("urlParse: Deny access to port %d\n", port);
328 return NULL;
329 }
330 #endif
331 if (stringHasWhitespace(urlpath)) {
332 debug(23, 2) ("urlParse: URI has whitespace: {%s}\n", url);
333 switch (Config.uri_whitespace) {
334 case URI_WHITESPACE_DENY:
335 return NULL;
336 case URI_WHITESPACE_ALLOW:
337 break;
338 case URI_WHITESPACE_ENCODE:
339 t = rfc1738_escape_unescaped(urlpath);
340 xstrncpy(urlpath, t, MAX_URL);
341 break;
342 case URI_WHITESPACE_CHOP:
343 *(urlpath + strcspn(urlpath, w_space)) = '\0';
344 break;
345 case URI_WHITESPACE_STRIP:
346 default:
347 t = q = urlpath;
348 while (*t) {
349 if (!xisspace(*t))
350 *q++ = *t;
351 t++;
352 }
353 *q = '\0';
354 }
355 }
356 request = requestCreate(method, protocol, urlpath);
357 xstrncpy(request->host, host, SQUIDHOSTNAMELEN);
358 xstrncpy(request->login, login, MAX_LOGIN_SZ);
359 request->port = (u_short) port;
360 return request;
361 }
362
363 static request_t *
364 urnParse(method_t method, char *urn)
365 {
366 debug(50, 5) ("urnParse: %s\n", urn);
367 return requestCreate(method, PROTO_URN, urn + 4);
368 }
369
370 const char *
371 urlCanonical(request_t * request)
372 {
373 LOCAL_ARRAY(char, portbuf, 32);
374 LOCAL_ARRAY(char, urlbuf, MAX_URL);
375 if (request->canonical)
376 return request->canonical;
377 if (request->protocol == PROTO_URN) {
378 snprintf(urlbuf, MAX_URL, "urn:%s", strBuf(request->urlpath));
379 } else {
380 switch (request->method) {
381 case METHOD_CONNECT:
382 snprintf(urlbuf, MAX_URL, "%s:%d", request->host, request->port);
383 break;
384 default:
385 portbuf[0] = '\0';
386 if (request->port != urlDefaultPort(request->protocol))
387 snprintf(portbuf, 32, ":%d", request->port);
388 snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s%s",
389 ProtocolStr[request->protocol],
390 request->login,
391 *request->login ? "@" : null_string,
392 request->host,
393 portbuf,
394 strBuf(request->urlpath));
395 break;
396 }
397 }
398 return (request->canonical = xstrdup(urlbuf));
399 }
400
401 char *
402 urlCanonicalClean(const request_t * request)
403 {
404 LOCAL_ARRAY(char, buf, MAX_URL);
405 LOCAL_ARRAY(char, portbuf, 32);
406 LOCAL_ARRAY(char, loginbuf, MAX_LOGIN_SZ + 1);
407 char *t;
408 if (request->protocol == PROTO_URN) {
409 snprintf(buf, MAX_URL, "urn:%s", strBuf(request->urlpath));
410 } else {
411 switch (request->method) {
412 case METHOD_CONNECT:
413 snprintf(buf, MAX_URL, "%s:%d", request->host, request->port);
414 break;
415 default:
416 portbuf[0] = '\0';
417 if (request->port != urlDefaultPort(request->protocol))
418 snprintf(portbuf, 32, ":%d", request->port);
419 loginbuf[0] = '\0';
420 if ((int) strlen(request->login) > 0) {
421 strcpy(loginbuf, request->login);
422 if ((t = strchr(loginbuf, ':')))
423 *t = '\0';
424 strcat(loginbuf, "@");
425 }
426 snprintf(buf, MAX_URL, "%s://%s%s%s%s",
427 ProtocolStr[request->protocol],
428 loginbuf,
429 request->host,
430 portbuf,
431 strBuf(request->urlpath));
432 /*
433 * strip arguments AFTER a question-mark
434 */
435 if (Config.onoff.strip_query_terms)
436 if ((t = strchr(buf, '?')))
437 *(++t) = '\0';
438 break;
439 }
440 }
441 if (stringHasCntl(buf))
442 xstrncpy(buf, rfc1738_escape_unescaped(buf), MAX_URL);
443 return buf;
444 }
445
446 /*
447 * matchDomainName() compares a hostname with a domainname according
448 * to the following rules:
449 *
450 * HOST DOMAIN MATCH?
451 * ------------- ------------- ------
452 * foo.com foo.com YES
453 * .foo.com foo.com YES
454 * x.foo.com foo.com NO
455 * foo.com .foo.com YES
456 * .foo.com .foo.com YES
457 * x.foo.com .foo.com YES
458 *
459 * We strip leading dots on hosts (but not domains!) so that
460 * ".foo.com" is is always the same as "foo.com".
461 *
462 * Return values:
463 * 0 means the host matches the domain
464 * 1 means the host is greater than the domain
465 * -1 means the host is less than the domain
466 */
467
468 int
469 matchDomainName(const char *h, const char *d)
470 {
471 int dl;
472 int hl;
473 while ('.' == *h)
474 h++;
475 hl = strlen(h);
476 dl = strlen(d);
477 /*
478 * Start at the ends of the two strings and work towards the
479 * beginning.
480 */
481 while (xtolower(h[--hl]) == xtolower(d[--dl])) {
482 if (hl == 0 && dl == 0) {
483 /*
484 * We made it all the way to the beginning of both
485 * strings without finding any difference.
486 */
487 return 0;
488 }
489 if (0 == hl) {
490 /*
491 * The host string is shorter than the domain string.
492 * There is only one case when this can be a match.
493 * If the domain is just one character longer, and if
494 * that character is a leading '.' then we call it a
495 * match.
496 */
497 if (1 == dl && '.' == d[0])
498 return 0;
499 else
500 return -1;
501 }
502 if (0 == dl) {
503 /*
504 * The domain string is shorter than the host string.
505 * This is a match only if the first domain character
506 * is a leading '.'.
507 */
508 if ('.' == d[0])
509 return 0;
510 else
511 return 1;
512 }
513 }
514 /*
515 * We found different characters in the same position (from the end).
516 */
517 /*
518 * If one of those character is '.' then its special. In order
519 * for splay tree sorting to work properly, "x-foo.com" must
520 * be greater than ".foo.com" even though '-' is less than '.'.
521 */
522 if ('.' == d[dl])
523 return 1;
524 if ('.' == h[hl])
525 return -1;
526 return (xtolower(h[hl]) - xtolower(d[dl]));
527 }
528
529 int
530 urlCheckRequest(const request_t * r)
531 {
532 int rc = 0;
533 /* protocol "independent" methods */
534 if (r->method == METHOD_CONNECT)
535 return 1;
536 if (r->method == METHOD_TRACE)
537 return 1;
538 if (r->method == METHOD_PURGE)
539 return 1;
540 /* does method match the protocol? */
541 switch (r->protocol) {
542 case PROTO_URN:
543 case PROTO_HTTP:
544 case PROTO_CACHEOBJ:
545 rc = 1;
546 break;
547 case PROTO_FTP:
548 if (r->method == METHOD_PUT)
549 rc = 1;
550 case PROTO_GOPHER:
551 case PROTO_WAIS:
552 case PROTO_WHOIS:
553 if (r->method == METHOD_GET)
554 rc = 1;
555 else if (r->method == METHOD_HEAD)
556 rc = 1;
557 break;
558 case PROTO_HTTPS:
559 #ifdef USE_SSL
560 rc = 1;
561 break;
562 #else
563 /*
564 * Squid can't originate an SSL connection, so it should
565 * never receive an "https:" URL. It should always be
566 * CONNECT instead.
567 */
568 rc = 0;
569 #endif
570 default:
571 break;
572 }
573 return rc;
574 }
575
576 /*
577 * Quick-n-dirty host extraction from a URL. Steps:
578 * Look for a colon
579 * Skip any '/' after the colon
580 * Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
581 * Look for an ending '/' or ':' and terminate
582 * Look for login info preceeded by '@'
583 */
584 char *
585 urlHostname(const char *url)
586 {
587 LOCAL_ARRAY(char, host, SQUIDHOSTNAMELEN);
588 char *t;
589 host[0] = '\0';
590 if (NULL == (t = strchr(url, ':')))
591 return NULL;
592 t++;
593 while (*t != '\0' && *t == '/')
594 t++;
595 xstrncpy(host, t, SQUIDHOSTNAMELEN);
596 if ((t = strchr(host, '/')))
597 *t = '\0';
598 if ((t = strchr(host, ':')))
599 *t = '\0';
600 if ((t = strrchr(host, '@'))) {
601 t++;
602 xmemmove(host, t, strlen(t) + 1);
603 }
604 return host;
605 }
606
607 static void
608 urlExtMethodAdd(const char *mstr)
609 {
610 method_t method = 0;
611 for (method++; method < METHOD_ENUM_END; method++) {
612 if (0 == strcmp(mstr, RequestMethodStr[method])) {
613 debug(23, 2) ("Extension method '%s' already exists\n", mstr);
614 return;
615 }
616 if (0 != strncmp("%EXT", RequestMethodStr[method], 4))
617 continue;
618 /* Don't free statically allocated "%EXTnn" string */
619 RequestMethodStr[method] = xstrdup(mstr);
620 debug(23, 1) ("Extension method '%s' added, enum=%d\n", mstr, (int) method);
621 return;
622 }
623 debug(23, 1) ("WARNING: Could not add new extension method '%s' due to lack of array space\n", mstr);
624 }
625
626 void
627 urlExtMethodConfigure(void)
628 {
629 wordlist *w = Config.ext_methods;
630 while (w) {
631 char *s;
632 for (s = w->key; *s; s++)
633 *s = xtoupper(*s);
634 urlExtMethodAdd(w->key);
635 w = w->next;
636 }
637 }