]> git.ipfire.org Git - thirdparty/squid.git/blob - src/url.cc
LINT
[thirdparty/squid.git] / src / url.cc
1
2 /*
3 * $Id: url.cc,v 1.66 1997/11/05 05:29:40 wessels Exp $
4 *
5 * DEBUG: section 23 URL Parsing
6 * AUTHOR: Duane Wessels
7 *
8 * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
9 * --------------------------------------------------------
10 *
11 * Squid is the result of efforts by numerous individuals from the
12 * Internet community. Development is led by Duane Wessels of the
13 * National Laboratory for Applied Network Research and funded by
14 * the National Science Foundation.
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
29 *
30 */
31
32 #include "squid.h"
33
34 const char *RequestMethodStr[] =
35 {
36 "NONE",
37 "GET",
38 "POST",
39 "PUT",
40 "HEAD",
41 "CONNECT",
42 "TRACE",
43 "PURGE"
44 };
45
46 const char *ProtocolStr[] =
47 {
48 "NONE",
49 "http",
50 "ftp",
51 "gopher",
52 "wais",
53 "cache_object",
54 "TOTAL"
55 };
56
57 static int url_acceptable[256];
58 static const char *const hex = "0123456789abcdef";
59
60 /* convert %xx in url string to a character
61 * Allocate a new string and return a pointer to converted string */
62
63 char *
64 url_convert_hex(char *org_url, int allocate)
65 {
66 static char code[] = "00";
67 char *url = NULL;
68 char *s = NULL;
69 char *t = NULL;
70
71 url = allocate ? (char *) xstrdup(org_url) : org_url;
72
73 if ((int) strlen(url) < 3 || !strchr(url, '%'))
74 return url;
75
76 for (s = t = url; *(s + 2); s++) {
77 if (*s == '%') {
78 code[0] = *(++s);
79 code[1] = *(++s);
80 *t++ = (char) strtol(code, NULL, 16);
81 } else {
82 *t++ = *s;
83 }
84 }
85 do {
86 *t++ = *s;
87 } while (*s++);
88 return url;
89 }
90
91
92 /* INIT Acceptable table.
93 * Borrow from libwww2 with Mosaic2.4 Distribution */
94 void
95 urlInitialize(void)
96 {
97 unsigned int i;
98 char *good =
99 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$";
100 debug(23, 5) ("urlInitialize: Initializing...\n");
101 for (i = 0; i < 256; i++)
102 url_acceptable[i] = 0;
103 for (; *good; good++)
104 url_acceptable[(unsigned int) *good] = 1;
105 }
106
107
108 /* Encode prohibited char in string */
109 /* return the pointer to new (allocated) string */
110 char *
111 url_escape(const char *url)
112 {
113 const char *p;
114 char *q;
115 char *tmpline = xcalloc(1, MAX_URL);
116
117 q = tmpline;
118 for (p = url; *p; p++) {
119 if (url_acceptable[(int) (*p)])
120 *q++ = *p;
121 else {
122 *q++ = '%'; /* Means hex coming */
123 *q++ = hex[(int) ((*p) >> 4)];
124 *q++ = hex[(int) ((*p) & 15)];
125 }
126 }
127 *q++ = '\0';
128 return tmpline;
129 }
130
131 method_t
132 urlParseMethod(const char *s)
133 {
134 if (strcasecmp(s, "GET") == 0) {
135 return METHOD_GET;
136 } else if (strcasecmp(s, "POST") == 0) {
137 return METHOD_POST;
138 } else if (strcasecmp(s, "PUT") == 0) {
139 return METHOD_PUT;
140 } else if (strcasecmp(s, "HEAD") == 0) {
141 return METHOD_HEAD;
142 } else if (strcasecmp(s, "CONNECT") == 0) {
143 return METHOD_CONNECT;
144 } else if (strcasecmp(s, "TRACE") == 0) {
145 return METHOD_TRACE;
146 } else if (strcasecmp(s, "PURGE") == 0) {
147 return METHOD_PURGE;
148 }
149 return METHOD_NONE;
150 }
151
152
153 protocol_t
154 urlParseProtocol(const char *s)
155 {
156 if (strncasecmp(s, "http", 4) == 0)
157 return PROTO_HTTP;
158 if (strncasecmp(s, "ftp", 3) == 0)
159 return PROTO_FTP;
160 #ifndef NO_FTP_FOR_FILE
161 if (strncasecmp(s, "file", 4) == 0)
162 return PROTO_FTP;
163 #endif
164 if (strncasecmp(s, "gopher", 6) == 0)
165 return PROTO_GOPHER;
166 if (strncasecmp(s, "wais", 4) == 0)
167 return PROTO_WAIS;
168 if (strncasecmp(s, "cache_object", 12) == 0)
169 return PROTO_CACHEOBJ;
170 return PROTO_NONE;
171 }
172
173
174 int
175 urlDefaultPort(protocol_t p)
176 {
177 switch (p) {
178 case PROTO_HTTP:
179 return 80;
180 case PROTO_FTP:
181 return 21;
182 case PROTO_GOPHER:
183 return 70;
184 case PROTO_WAIS:
185 return 210;
186 case PROTO_CACHEOBJ:
187 return CACHE_HTTP_PORT;
188 default:
189 return 0;
190 }
191 }
192
193 request_t *
194 urlParse(method_t method, char *url)
195 {
196 LOCAL_ARRAY(char, proto, MAX_URL);
197 LOCAL_ARRAY(char, login, MAX_URL);
198 LOCAL_ARRAY(char, host, MAX_URL);
199 LOCAL_ARRAY(char, urlpath, MAX_URL);
200 request_t *request = NULL;
201 char *t = NULL;
202 int port;
203 protocol_t protocol = PROTO_NONE;
204 int l;
205 proto[0] = host[0] = urlpath[0] = login[0] = '\0';
206
207 if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
208 /* terminate so it doesn't overflow other buffers */
209 *(url + (MAX_URL >> 1)) = '\0';
210 debug(23, 0) ("urlParse: URL too large (%d bytes)\n", l);
211 return NULL;
212 }
213 if (method == METHOD_CONNECT) {
214 port = CONNECT_PORT;
215 if (sscanf(url, "%[^:]:%d", host, &port) < 1)
216 return NULL;
217 } else {
218 if (sscanf(url, "%[^:]://%[^/]%s", proto, host, urlpath) < 2)
219 return NULL;
220 protocol = urlParseProtocol(proto);
221 port = urlDefaultPort(protocol);
222 /* Is there any login informaiton? */
223 if ((t = strrchr(host, '@'))) {
224 strcpy(login, host);
225 t = strrchr(login, '@');
226 *t = 0;
227 strcpy(host, t + 1);
228 }
229 if ((t = strrchr(host, ':'))) {
230 *t++ = '\0';
231 if (*t != '\0')
232 port = atoi(t);
233 }
234 }
235 for (t = host; *t; t++)
236 *t = tolower(*t);
237 /* remove trailing dots from hostnames */
238 while ((l = strlen(host)) > 0 && host[--l] == '.')
239 host[l] = '\0';
240 if (Config.appendDomain && !strchr(host, '.'))
241 strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN);
242 if (port == 0) {
243 debug(23, 0) ("urlParse: Invalid port == 0\n");
244 return NULL;
245 }
246 #ifdef HARDCODE_DENY_PORTS
247 /* These ports are filtered in the default squid.conf, but
248 * maybe someone wants them hardcoded... */
249 if (port == 7 || port == 9 || port = 19) {
250 debug(23, 0) ("urlParse: Deny access to port %d\n", port);
251 return NULL;
252 }
253 #endif
254 #ifdef REMOVE_FTP_TRAILING_SLASHES
255 /* remove trailing slashes from FTP URLs */
256 if (protocol == PROTO_FTP) {
257 t = urlpath + strlen(urlpath);
258 while (t > urlpath && *(--t) == '/')
259 *t = '\0';
260 }
261 #endif
262 request = get_free_request_t();
263 request->method = method;
264 request->protocol = protocol;
265 xstrncpy(request->host, host, SQUIDHOSTNAMELEN);
266 xstrncpy(request->login, login, MAX_LOGIN_SZ);
267 request->port = (u_short) port;
268 xstrncpy(request->urlpath, urlpath, MAX_URL);
269 request->max_age = -1;
270 request->max_forwards = -1;
271 return request;
272 }
273
274 char *
275 urlCanonical(const request_t * request, char *buf)
276 {
277 LOCAL_ARRAY(char, urlbuf, MAX_URL);
278 LOCAL_ARRAY(char, portbuf, 32);
279 if (buf == NULL)
280 buf = urlbuf;
281 switch (request->method) {
282 case METHOD_CONNECT:
283 snprintf(buf, MAX_URL, "%s:%d", request->host, request->port);
284 break;
285 default:
286 portbuf[0] = '\0';
287 if (request->port != urlDefaultPort(request->protocol))
288 snprintf(portbuf, 32, ":%d", request->port);
289 snprintf(buf, MAX_URL, "%s://%s%s%s%s%s",
290 ProtocolStr[request->protocol],
291 request->login,
292 *request->login ? "@" : null_string,
293 request->host,
294 portbuf,
295 request->urlpath);
296 break;
297 }
298 return buf;
299 }
300
301 char *
302 urlCanonicalClean(const request_t * request)
303 {
304 LOCAL_ARRAY(char, buf, MAX_URL);
305 LOCAL_ARRAY(char, portbuf, 32);
306 char *t;
307 switch (request->method) {
308 case METHOD_CONNECT:
309 snprintf(buf, MAX_URL, "%s:%d", request->host, request->port);
310 break;
311 default:
312 portbuf[0] = '\0';
313 if (request->port != urlDefaultPort(request->protocol))
314 snprintf(portbuf, 32, ":%d", request->port);
315 snprintf(buf, MAX_URL, "%s://%s%s%s",
316 ProtocolStr[request->protocol],
317 request->host,
318 portbuf,
319 request->urlpath);
320 if ((t = strchr(buf, '?')))
321 *t = '\0';
322 break;
323 }
324 return buf;
325 }
326
327 char *
328 urlClean(char *dirty)
329 {
330 char *clean;
331 request_t *r = urlParse(METHOD_GET, dirty);
332 if (r == NULL)
333 return dirty;
334 clean = urlCanonicalClean(r);
335 put_free_request_t(r);
336 return clean;
337 }
338
339
340 request_t *
341 requestLink(request_t * request)
342 {
343 request->link_count++;
344 return request;
345 }
346
347 void
348 requestUnlink(request_t * request)
349 {
350 if (request == NULL)
351 return;
352 request->link_count--;
353 if (request->link_count)
354 return;
355 safe_free(request->headers);
356 put_free_request_t(request);
357 }
358
359 int
360 matchDomainName(const char *domain, const char *host)
361 {
362 int offset;
363 if ((offset = strlen(host) - strlen(domain)) < 0)
364 return 0; /* host too short */
365 if (strcasecmp(domain, host + offset) != 0)
366 return 0; /* no match at all */
367 if (*domain == '.')
368 return 1;
369 if (*(host + offset - 1) == '.')
370 return 1;
371 if (offset == 0)
372 return 1;
373 return 0;
374 }
375
376 int
377 urlCheckRequest(const request_t * r)
378 {
379 int rc = 0;
380 if (r->method == METHOD_CONNECT)
381 return 1;
382 if (r->method == METHOD_TRACE)
383 return 1;
384 if (r->method == METHOD_PURGE)
385 return 1;
386 switch (r->protocol) {
387 case PROTO_HTTP:
388 case PROTO_CACHEOBJ:
389 rc = 1;
390 break;
391 case PROTO_FTP:
392 case PROTO_GOPHER:
393 case PROTO_WAIS:
394 if (r->method == METHOD_GET)
395 rc = 1;
396 break;
397 default:
398 break;
399 }
400 return rc;
401 }