]> git.ipfire.org Git - thirdparty/squid.git/blob - src/url.cc
allow whitespace in request URI's.
[thirdparty/squid.git] / src / url.cc
1
2 /*
3 * $Id: url.cc,v 1.109 1998/10/14 21:12:04 wessels Exp $
4 *
5 * DEBUG: section 23 URL Parsing
6 * AUTHOR: Duane Wessels
7 *
8 * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
9 * ----------------------------------------------------------
10 *
11 * Squid is the result of efforts by numerous individuals from the
12 * Internet community. Development is led by Duane Wessels of the
13 * National Laboratory for Applied Network Research and funded by the
14 * National Science Foundation. Squid is Copyrighted (C) 1998 by
15 * Duane Wessels and the University of California San Diego. Please
16 * see the COPYRIGHT file for full details. Squid incorporates
17 * software developed and/or copyrighted by other sources. Please see
18 * the CREDITS file for full details.
19 *
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
33 *
34 */
35
36 #include "squid.h"
37
38 const char *RequestMethodStr[] =
39 {
40 "NONE",
41 "GET",
42 "POST",
43 "PUT",
44 "HEAD",
45 "CONNECT",
46 "TRACE",
47 "PURGE"
48 };
49
50 const char *ProtocolStr[] =
51 {
52 "NONE",
53 "http",
54 "ftp",
55 "gopher",
56 "wais",
57 "cache_object",
58 "icp",
59 #if USE_HTCP
60 "htcp",
61 #endif
62 "urn",
63 "whois",
64 "internal",
65 "https",
66 "TOTAL"
67 };
68
69 static const char *const hex = "0123456789abcdef";
70 static request_t *urnParse(method_t method, char *urn);
71 static const char *const valid_hostname_chars =
72 #if ALLOW_HOSTNAME_UNDERSCORES
73 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
74 "abcdefghijklmnopqrstuvwxyz"
75 "0123456789-._";
76 #else
77 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
78 "abcdefghijklmnopqrstuvwxyz"
79 "0123456789-.";
80 #endif
81
82 /* convert %xx in url string to a character
83 * Allocate a new string and return a pointer to converted string */
84
85 char *
86 url_convert_hex(char *org_url, int allocate)
87 {
88 static char code[] = "00";
89 char *url = NULL;
90 char *s = NULL;
91 char *t = NULL;
92 url = allocate ? (char *) xstrdup(org_url) : org_url;
93 if ((int) strlen(url) < 3 || !strchr(url, '%'))
94 return url;
95 for (s = t = url; *(s + 2); s++) {
96 if (*s == '%') {
97 code[0] = *(++s);
98 code[1] = *(++s);
99 *t++ = (char) strtol(code, NULL, 16);
100 } else {
101 *t++ = *s;
102 }
103 }
104 do {
105 *t++ = *s;
106 } while (*s++);
107 return url;
108 }
109
110 void
111 urlInitialize(void)
112 {
113 debug(23, 5) ("urlInitialize: Initializing...\n");
114 assert(sizeof(ProtocolStr) == (PROTO_MAX + 1) * sizeof(char *));
115 memset(&null_request_flags, '\0', sizeof(null_request_flags));
116 }
117
118 method_t
119 urlParseMethod(const char *s)
120 {
121 if (strcasecmp(s, "GET") == 0) {
122 return METHOD_GET;
123 } else if (strcasecmp(s, "POST") == 0) {
124 return METHOD_POST;
125 } else if (strcasecmp(s, "PUT") == 0) {
126 return METHOD_PUT;
127 } else if (strcasecmp(s, "HEAD") == 0) {
128 return METHOD_HEAD;
129 } else if (strcasecmp(s, "CONNECT") == 0) {
130 return METHOD_CONNECT;
131 } else if (strcasecmp(s, "TRACE") == 0) {
132 return METHOD_TRACE;
133 } else if (strcasecmp(s, "PURGE") == 0) {
134 return METHOD_PURGE;
135 }
136 return METHOD_NONE;
137 }
138
139
140 protocol_t
141 urlParseProtocol(const char *s)
142 {
143 /* test common stuff first */
144 if (strcasecmp(s, "http") == 0)
145 return PROTO_HTTP;
146 if (strcasecmp(s, "ftp") == 0)
147 return PROTO_FTP;
148 if (strcasecmp(s, "https") == 0)
149 return PROTO_HTTPS;
150 if (strcasecmp(s, "file") == 0)
151 return PROTO_FTP;
152 if (strcasecmp(s, "gopher") == 0)
153 return PROTO_GOPHER;
154 if (strcasecmp(s, "wais") == 0)
155 return PROTO_WAIS;
156 if (strcasecmp(s, "cache_object") == 0)
157 return PROTO_CACHEOBJ;
158 if (strcasecmp(s, "urn") == 0)
159 return PROTO_URN;
160 if (strcasecmp(s, "whois") == 0)
161 return PROTO_WHOIS;
162 if (strcasecmp(s, "internal") == 0)
163 return PROTO_INTERNAL;
164 return PROTO_NONE;
165 }
166
167
168 int
169 urlDefaultPort(protocol_t p)
170 {
171 switch (p) {
172 case PROTO_HTTP:
173 return 80;
174 case PROTO_HTTPS:
175 return 443;
176 case PROTO_FTP:
177 return 21;
178 case PROTO_GOPHER:
179 return 70;
180 case PROTO_WAIS:
181 return 210;
182 case PROTO_CACHEOBJ:
183 case PROTO_INTERNAL:
184 return CACHE_HTTP_PORT;
185 case PROTO_WHOIS:
186 return 43;
187 default:
188 return 0;
189 }
190 }
191
192 request_t *
193 urlParse(method_t method, char *url)
194 {
195 LOCAL_ARRAY(char, proto, MAX_URL);
196 LOCAL_ARRAY(char, login, MAX_URL);
197 LOCAL_ARRAY(char, host, MAX_URL);
198 LOCAL_ARRAY(char, urlpath, MAX_URL);
199 request_t *request = NULL;
200 char *t = NULL;
201 int port;
202 protocol_t protocol = PROTO_NONE;
203 int l;
204 proto[0] = host[0] = urlpath[0] = login[0] = '\0';
205
206 if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
207 /* terminate so it doesn't overflow other buffers */
208 *(url + (MAX_URL >> 1)) = '\0';
209 debug(23, 1) ("urlParse: URL too large (%d bytes)\n", l);
210 return NULL;
211 }
212 if (method == METHOD_CONNECT) {
213 port = CONNECT_PORT;
214 if (sscanf(url, "%[^:]:%d", host, &port) < 1)
215 return NULL;
216 } else if (!strncmp(url, "urn:", 4)) {
217 return urnParse(method, url);
218 } else {
219 if (sscanf(url, "%[^:]://%[^/]%[^\r\n]", proto, host, urlpath) < 2)
220 return NULL;
221 protocol = urlParseProtocol(proto);
222 port = urlDefaultPort(protocol);
223 /* Is there any login informaiton? */
224 if ((t = strrchr(host, '@'))) {
225 strcpy(login, host);
226 t = strrchr(login, '@');
227 *t = 0;
228 strcpy(host, t + 1);
229 }
230 if ((t = strrchr(host, ':'))) {
231 *t++ = '\0';
232 if (*t != '\0')
233 port = atoi(t);
234 }
235 }
236 for (t = host; *t; t++)
237 *t = tolower(*t);
238 if (strspn(host, valid_hostname_chars) != strlen(host)) {
239 debug(23, 1) ("urlParse: Illegal character in hostname '%s'\n", host);
240 return NULL;
241 }
242 /* remove trailing dots from hostnames */
243 while ((l = strlen(host)) > 0 && host[--l] == '.')
244 host[l] = '\0';
245 if (Config.appendDomain && !strchr(host, '.'))
246 strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN);
247 if (port == 0) {
248 debug(23, 3) ("urlParse: Invalid port == 0\n");
249 return NULL;
250 }
251 #ifdef HARDCODE_DENY_PORTS
252 /* These ports are filtered in the default squid.conf, but
253 * maybe someone wants them hardcoded... */
254 if (port == 7 || port == 9 || port = 19) {
255 debug(23, 0) ("urlParse: Deny access to port %d\n", port);
256 return NULL;
257 }
258 #endif
259 if (stringHasWhitespace(urlpath)) {
260 debug(23, 1) ("urlParse: URI has whitespace: {%s}\n", url);
261 switch (Config.uri_whitespace) {
262 case URI_WHITESPACE_DENY:
263 return NULL;
264 case URI_WHITESPACE_ALLOW:
265 break;
266 case URI_WHITESPACE_ENCODE:
267 t = rfc1738_escape(urlpath);
268 xstrncpy(urlpath, t, MAX_URL);
269 break;
270 case URI_WHITESPACE_CHOP:
271 *(urlpath + strcspn(urlpath, w_space)) = '\0';
272 break;
273 }
274 }
275 request = requestCreate(method, protocol, urlpath);
276 xstrncpy(request->host, host, SQUIDHOSTNAMELEN);
277 xstrncpy(request->login, login, MAX_LOGIN_SZ);
278 request->port = (u_short) port;
279 return request;
280 }
281
282 static request_t *
283 urnParse(method_t method, char *urn)
284 {
285 debug(50, 5) ("urnParse: %s\n", urn);
286 return requestCreate(method, PROTO_URN, urn + 4);
287 }
288
289 const char *
290 urlCanonical(request_t * request)
291 {
292 LOCAL_ARRAY(char, portbuf, 32);
293 LOCAL_ARRAY(char, urlbuf, MAX_URL);
294 if (request->canonical)
295 return request->canonical;
296 if (request->protocol == PROTO_URN) {
297 snprintf(urlbuf, MAX_URL, "urn:%s", strBuf(request->urlpath));
298 } else {
299 switch (request->method) {
300 case METHOD_CONNECT:
301 snprintf(urlbuf, MAX_URL, "%s:%d", request->host, request->port);
302 break;
303 default:
304 portbuf[0] = '\0';
305 if (request->port != urlDefaultPort(request->protocol))
306 snprintf(portbuf, 32, ":%d", request->port);
307 snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s%s",
308 ProtocolStr[request->protocol],
309 request->login,
310 *request->login ? "@" : null_string,
311 request->host,
312 portbuf,
313 strBuf(request->urlpath));
314 break;
315 }
316 }
317 return (request->canonical = xstrdup(urlbuf));
318 }
319
320 char *
321 urlCanonicalClean(const request_t * request)
322 {
323 LOCAL_ARRAY(char, buf, MAX_URL);
324 LOCAL_ARRAY(char, portbuf, 32);
325 LOCAL_ARRAY(char, loginbuf, MAX_LOGIN_SZ + 1);
326 char *t;
327 if (request->protocol == PROTO_URN) {
328 snprintf(buf, MAX_URL, "urn:%s", strBuf(request->urlpath));
329 } else {
330 switch (request->method) {
331 case METHOD_CONNECT:
332 snprintf(buf, MAX_URL, "%s:%d", request->host, request->port);
333 break;
334 default:
335 portbuf[0] = '\0';
336 if (request->port != urlDefaultPort(request->protocol))
337 snprintf(portbuf, 32, ":%d", request->port);
338 loginbuf[0] = '\0';
339 if ((int) strlen(request->login) > 0) {
340 strcpy(loginbuf, request->login);
341 if ((t = strchr(loginbuf, ':')))
342 *t = '\0';
343 strcat(loginbuf, "@");
344 }
345 snprintf(buf, MAX_URL, "%s://%s%s%s%s",
346 ProtocolStr[request->protocol],
347 loginbuf,
348 request->host,
349 portbuf,
350 strBuf(request->urlpath));
351 /*
352 * strip arguments AFTER a question-mark
353 */
354 if ((t = strchr(buf, '?')))
355 *(++t) = '\0';
356 break;
357 }
358 }
359 if (stringHasWhitespace(buf))
360 xstrncpy(buf, rfc1738_escape(buf), MAX_URL);
361 return buf;
362 }
363
364 int
365 matchDomainName(const char *domain, const char *host)
366 {
367 int offset;
368 if ((offset = strlen(host) - strlen(domain)) < 0)
369 return 0; /* host too short */
370 if (strcasecmp(domain, host + offset) != 0)
371 return 0; /* no match at all */
372 if (*domain == '.')
373 return 1;
374 if (offset == 0)
375 return 1;
376 if (*(host + offset - 1) == '.')
377 return 1;
378 return 0;
379 }
380
381 int
382 urlCheckRequest(const request_t * r)
383 {
384 int rc = 0;
385 /* protocol "independent" methods */
386 if (r->method == METHOD_CONNECT)
387 return 1;
388 if (r->method == METHOD_TRACE)
389 return 1;
390 if (r->method == METHOD_PURGE)
391 return 1;
392 /* does method match the protocol? */
393 switch (r->protocol) {
394 case PROTO_URN:
395 case PROTO_HTTP:
396 case PROTO_HTTPS:
397 case PROTO_CACHEOBJ:
398 rc = 1;
399 break;
400 case PROTO_FTP:
401 if (r->method == METHOD_PUT)
402 rc = 1;
403 case PROTO_GOPHER:
404 case PROTO_WAIS:
405 case PROTO_WHOIS:
406 if (r->method == METHOD_GET)
407 rc = 1;
408 else if (r->method == METHOD_HEAD)
409 rc = 1;
410 break;
411 default:
412 break;
413 }
414 return rc;
415 }
416
417 /*
418 * Quick-n-dirty host extraction from a URL. Steps:
419 * Look for a colon
420 * Skip any '/' after the colon
421 * Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
422 * Look for an ending '/' or ':' and terminate
423 * Look for login info preceeded by '@'
424 */
425 char *
426 urlHostname(const char *url)
427 {
428 LOCAL_ARRAY(char, host, SQUIDHOSTNAMELEN);
429 char *t;
430 host[0] = '\0';
431 if (NULL == (t = strchr(url, ':')))
432 return NULL;
433 t++;
434 while (*t != '\0' && *t == '/')
435 t++;
436 xstrncpy(host, t, SQUIDHOSTNAMELEN);
437 if ((t = strchr(host, '/')))
438 *t = '\0';
439 if ((t = strchr(host, ':')))
440 *t = '\0';
441 if ((t = strrchr(host, '@'))) {
442 t++;
443 xmemmove(host, t, strlen(t) + 1);
444 }
445 return host;
446 }