]> git.ipfire.org Git - thirdparty/squid.git/blob - src/url.cc
branch 2.2 merge
[thirdparty/squid.git] / src / url.cc
1
2 /*
3 * $Id: url.cc,v 1.115 1999/04/23 02:57:41 wessels Exp $
4 * $Id: url.cc,v 1.115 1999/04/23 02:57:41 wessels Exp $
5 *
6 * DEBUG: section 23 URL Parsing
7 * AUTHOR: Duane Wessels
8 *
9 * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
10 * ----------------------------------------------------------
11 *
12 * Squid is the result of efforts by numerous individuals from the
13 * Internet community. Development is led by Duane Wessels of the
14 * National Laboratory for Applied Network Research and funded by the
15 * National Science Foundation. Squid is Copyrighted (C) 1998 by
16 * Duane Wessels and the University of California San Diego. Please
17 * see the COPYRIGHT file for full details. Squid incorporates
18 * software developed and/or copyrighted by other sources. Please see
19 * the CREDITS file for full details.
20 *
21 * This program is free software; you can redistribute it and/or modify
22 * it under the terms of the GNU General Public License as published by
23 * the Free Software Foundation; either version 2 of the License, or
24 * (at your option) any later version.
25 *
26 * This program is distributed in the hope that it will be useful,
27 * but WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 * GNU General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with this program; if not, write to the Free Software
33 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
34 *
35 */
36
37 #include "squid.h"
38
39 const char *RequestMethodStr[] =
40 {
41 "NONE",
42 "GET",
43 "POST",
44 "PUT",
45 "HEAD",
46 "CONNECT",
47 "TRACE",
48 "PURGE"
49 };
50
51 const char *ProtocolStr[] =
52 {
53 "NONE",
54 "http",
55 "ftp",
56 "gopher",
57 "wais",
58 "cache_object",
59 "icp",
60 #if USE_HTCP
61 "htcp",
62 #endif
63 "urn",
64 "whois",
65 "internal",
66 "https",
67 "TOTAL"
68 };
69
70 static request_t *urnParse(method_t method, char *urn);
71 static const char *const valid_hostname_chars =
72 #if ALLOW_HOSTNAME_UNDERSCORES
73 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
74 "abcdefghijklmnopqrstuvwxyz"
75 "0123456789-._";
76 #else
77 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
78 "abcdefghijklmnopqrstuvwxyz"
79 "0123456789-.";
80 #endif
81
82 /* convert %xx in url string to a character
83 * Allocate a new string and return a pointer to converted string */
84
85 char *
86 url_convert_hex(char *org_url, int allocate)
87 {
88 static char code[] = "00";
89 char *url = NULL;
90 char *s = NULL;
91 char *t = NULL;
92 url = allocate ? (char *) xstrdup(org_url) : org_url;
93 if ((int) strlen(url) < 3 || !strchr(url, '%'))
94 return url;
95 for (s = t = url; *(s + 2); s++) {
96 if (*s == '%') {
97 code[0] = *(++s);
98 code[1] = *(++s);
99 *t++ = (char) strtol(code, NULL, 16);
100 } else {
101 *t++ = *s;
102 }
103 }
104 do {
105 *t++ = *s;
106 } while (*s++);
107 return url;
108 }
109
110 void
111 urlInitialize(void)
112 {
113 debug(23, 5) ("urlInitialize: Initializing...\n");
114 assert(sizeof(ProtocolStr) == (PROTO_MAX + 1) * sizeof(char *));
115 memset(&null_request_flags, '\0', sizeof(null_request_flags));
116 }
117
118 method_t
119 urlParseMethod(const char *s)
120 {
121 if (strcasecmp(s, "GET") == 0) {
122 return METHOD_GET;
123 } else if (strcasecmp(s, "POST") == 0) {
124 return METHOD_POST;
125 } else if (strcasecmp(s, "PUT") == 0) {
126 return METHOD_PUT;
127 } else if (strcasecmp(s, "HEAD") == 0) {
128 return METHOD_HEAD;
129 } else if (strcasecmp(s, "CONNECT") == 0) {
130 return METHOD_CONNECT;
131 } else if (strcasecmp(s, "TRACE") == 0) {
132 return METHOD_TRACE;
133 } else if (strcasecmp(s, "PURGE") == 0) {
134 return METHOD_PURGE;
135 }
136 return METHOD_NONE;
137 }
138
139
140 protocol_t
141 urlParseProtocol(const char *s)
142 {
143 /* test common stuff first */
144 if (strcasecmp(s, "http") == 0)
145 return PROTO_HTTP;
146 if (strcasecmp(s, "ftp") == 0)
147 return PROTO_FTP;
148 if (strcasecmp(s, "https") == 0)
149 return PROTO_HTTPS;
150 if (strcasecmp(s, "file") == 0)
151 return PROTO_FTP;
152 if (strcasecmp(s, "gopher") == 0)
153 return PROTO_GOPHER;
154 if (strcasecmp(s, "wais") == 0)
155 return PROTO_WAIS;
156 if (strcasecmp(s, "cache_object") == 0)
157 return PROTO_CACHEOBJ;
158 if (strcasecmp(s, "urn") == 0)
159 return PROTO_URN;
160 if (strcasecmp(s, "whois") == 0)
161 return PROTO_WHOIS;
162 if (strcasecmp(s, "internal") == 0)
163 return PROTO_INTERNAL;
164 return PROTO_NONE;
165 }
166
167
168 int
169 urlDefaultPort(protocol_t p)
170 {
171 switch (p) {
172 case PROTO_HTTP:
173 return 80;
174 case PROTO_HTTPS:
175 return 443;
176 case PROTO_FTP:
177 return 21;
178 case PROTO_GOPHER:
179 return 70;
180 case PROTO_WAIS:
181 return 210;
182 case PROTO_CACHEOBJ:
183 case PROTO_INTERNAL:
184 return CACHE_HTTP_PORT;
185 case PROTO_WHOIS:
186 return 43;
187 default:
188 return 0;
189 }
190 }
191
192 request_t *
193 urlParse(method_t method, char *url)
194 {
195 LOCAL_ARRAY(char, proto, MAX_URL);
196 LOCAL_ARRAY(char, login, MAX_URL);
197 LOCAL_ARRAY(char, host, MAX_URL);
198 LOCAL_ARRAY(char, urlpath, MAX_URL);
199 request_t *request = NULL;
200 char *t = NULL;
201 int port;
202 protocol_t protocol = PROTO_NONE;
203 int l;
204 proto[0] = host[0] = urlpath[0] = login[0] = '\0';
205
206 if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
207 /* terminate so it doesn't overflow other buffers */
208 *(url + (MAX_URL >> 1)) = '\0';
209 debug(23, 1) ("urlParse: URL too large (%d bytes)\n", l);
210 return NULL;
211 }
212 if (method == METHOD_CONNECT) {
213 port = CONNECT_PORT;
214 if (sscanf(url, "%[^:]:%d", host, &port) < 1)
215 return NULL;
216 } else if (!strncmp(url, "urn:", 4)) {
217 return urnParse(method, url);
218 } else {
219 if (sscanf(url, "%[^:]://%[^/]%[^\r\n]", proto, host, urlpath) < 2)
220 return NULL;
221 protocol = urlParseProtocol(proto);
222 port = urlDefaultPort(protocol);
223 /* Is there any login informaiton? */
224 if ((t = strrchr(host, '@'))) {
225 strcpy(login, host);
226 t = strrchr(login, '@');
227 *t = 0;
228 strcpy(host, t + 1);
229 }
230 if ((t = strrchr(host, ':'))) {
231 *t++ = '\0';
232 if (*t != '\0')
233 port = atoi(t);
234 }
235 }
236 for (t = host; *t; t++)
237 *t = xtolower(*t);
238 if (strspn(host, valid_hostname_chars) != strlen(host)) {
239 debug(23, 1) ("urlParse: Illegal character in hostname '%s'\n", host);
240 return NULL;
241 }
242 /* remove trailing dots from hostnames */
243 while ((l = strlen(host)) > 0 && host[--l] == '.')
244 host[l] = '\0';
245 if (Config.appendDomain && !strchr(host, '.'))
246 strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN);
247 if (port == 0) {
248 debug(23, 3) ("urlParse: Invalid port == 0\n");
249 return NULL;
250 }
251 #ifdef HARDCODE_DENY_PORTS
252 /* These ports are filtered in the default squid.conf, but
253 * maybe someone wants them hardcoded... */
254 if (port == 7 || port == 9 || port = 19) {
255 debug(23, 0) ("urlParse: Deny access to port %d\n", port);
256 return NULL;
257 }
258 #endif
259 if (stringHasWhitespace(urlpath)) {
260 debug(23, 2) ("urlParse: URI has whitespace: {%s}\n", url);
261 switch (Config.uri_whitespace) {
262 case URI_WHITESPACE_DENY:
263 return NULL;
264 case URI_WHITESPACE_ALLOW:
265 break;
266 case URI_WHITESPACE_ENCODE:
267 t = rfc1738_escape(urlpath);
268 xstrncpy(urlpath, t, MAX_URL);
269 break;
270 case URI_WHITESPACE_CHOP:
271 *(urlpath + strcspn(urlpath, w_space)) = '\0';
272 break;
273 }
274 }
275 request = requestCreate(method, protocol, urlpath);
276 xstrncpy(request->host, host, SQUIDHOSTNAMELEN);
277 xstrncpy(request->login, login, MAX_LOGIN_SZ);
278 request->port = (u_short) port;
279 return request;
280 }
281
282 static request_t *
283 urnParse(method_t method, char *urn)
284 {
285 debug(50, 5) ("urnParse: %s\n", urn);
286 return requestCreate(method, PROTO_URN, urn + 4);
287 }
288
289 const char *
290 urlCanonical(request_t * request)
291 {
292 LOCAL_ARRAY(char, portbuf, 32);
293 LOCAL_ARRAY(char, urlbuf, MAX_URL);
294 if (request->canonical)
295 return request->canonical;
296 if (request->protocol == PROTO_URN) {
297 snprintf(urlbuf, MAX_URL, "urn:%s", strBuf(request->urlpath));
298 } else {
299 switch (request->method) {
300 case METHOD_CONNECT:
301 snprintf(urlbuf, MAX_URL, "%s:%d", request->host, request->port);
302 break;
303 default:
304 portbuf[0] = '\0';
305 if (request->port != urlDefaultPort(request->protocol))
306 snprintf(portbuf, 32, ":%d", request->port);
307 snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s%s",
308 ProtocolStr[request->protocol],
309 request->login,
310 *request->login ? "@" : null_string,
311 request->host,
312 portbuf,
313 strBuf(request->urlpath));
314 break;
315 }
316 }
317 return (request->canonical = xstrdup(urlbuf));
318 }
319
320 char *
321 urlCanonicalClean(const request_t * request)
322 {
323 LOCAL_ARRAY(char, buf, MAX_URL);
324 LOCAL_ARRAY(char, portbuf, 32);
325 LOCAL_ARRAY(char, loginbuf, MAX_LOGIN_SZ + 1);
326 char *t;
327 if (request->protocol == PROTO_URN) {
328 snprintf(buf, MAX_URL, "urn:%s", strBuf(request->urlpath));
329 } else {
330 switch (request->method) {
331 case METHOD_CONNECT:
332 snprintf(buf, MAX_URL, "%s:%d", request->host, request->port);
333 break;
334 default:
335 portbuf[0] = '\0';
336 if (request->port != urlDefaultPort(request->protocol))
337 snprintf(portbuf, 32, ":%d", request->port);
338 loginbuf[0] = '\0';
339 if ((int) strlen(request->login) > 0) {
340 strcpy(loginbuf, request->login);
341 if ((t = strchr(loginbuf, ':')))
342 *t = '\0';
343 strcat(loginbuf, "@");
344 }
345 snprintf(buf, MAX_URL, "%s://%s%s%s%s",
346 ProtocolStr[request->protocol],
347 loginbuf,
348 request->host,
349 portbuf,
350 strBuf(request->urlpath));
351 /*
352 * strip arguments AFTER a question-mark
353 */
354 if (Config.onoff.strip_query_terms)
355 if ((t = strchr(buf, '?')))
356 *(++t) = '\0';
357 break;
358 }
359 }
360 if (stringHasWhitespace(buf))
361 xstrncpy(buf, rfc1738_escape(buf), MAX_URL);
362 return buf;
363 }
364
365 int
366 matchDomainName(const char *domain, const char *host)
367 {
368 int offset;
369 if ((offset = strlen(host) - strlen(domain)) < 0)
370 return 0; /* host too short */
371 if (strcasecmp(domain, host + offset) != 0)
372 return 0; /* no match at all */
373 if (*domain == '.')
374 return 1;
375 if (offset == 0)
376 return 1;
377 if (*(host + offset - 1) == '.')
378 return 1;
379 return 0;
380 }
381
382 int
383 urlCheckRequest(const request_t * r)
384 {
385 int rc = 0;
386 /* protocol "independent" methods */
387 if (r->method == METHOD_CONNECT)
388 return 1;
389 if (r->method == METHOD_TRACE)
390 return 1;
391 if (r->method == METHOD_PURGE)
392 return 1;
393 /* does method match the protocol? */
394 switch (r->protocol) {
395 case PROTO_URN:
396 case PROTO_HTTP:
397 case PROTO_HTTPS:
398 case PROTO_CACHEOBJ:
399 rc = 1;
400 break;
401 case PROTO_FTP:
402 if (r->method == METHOD_PUT)
403 rc = 1;
404 case PROTO_GOPHER:
405 case PROTO_WAIS:
406 case PROTO_WHOIS:
407 if (r->method == METHOD_GET)
408 rc = 1;
409 else if (r->method == METHOD_HEAD)
410 rc = 1;
411 break;
412 default:
413 break;
414 }
415 return rc;
416 }
417
418 /*
419 * Quick-n-dirty host extraction from a URL. Steps:
420 * Look for a colon
421 * Skip any '/' after the colon
422 * Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
423 * Look for an ending '/' or ':' and terminate
424 * Look for login info preceeded by '@'
425 */
426 char *
427 urlHostname(const char *url)
428 {
429 LOCAL_ARRAY(char, host, SQUIDHOSTNAMELEN);
430 char *t;
431 host[0] = '\0';
432 if (NULL == (t = strchr(url, ':')))
433 return NULL;
434 t++;
435 while (*t != '\0' && *t == '/')
436 t++;
437 xstrncpy(host, t, SQUIDHOSTNAMELEN);
438 if ((t = strchr(host, '/')))
439 *t = '\0';
440 if ((t = strchr(host, ':')))
441 *t = '\0';
442 if ((t = strrchr(host, '@'))) {
443 t++;
444 xmemmove(host, t, strlen(t) + 1);
445 }
446 return host;
447 }