]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/url.cc
3 * $Id: url.cc,v 1.111 1998/12/15 17:33:59 wessels Exp $
5 * DEBUG: section 23 URL Parsing
6 * AUTHOR: Duane Wessels
8 * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
9 * ----------------------------------------------------------
11 * Squid is the result of efforts by numerous individuals from the
12 * Internet community. Development is led by Duane Wessels of the
13 * National Laboratory for Applied Network Research and funded by the
14 * National Science Foundation. Squid is Copyrighted (C) 1998 by
15 * Duane Wessels and the University of California San Diego. Please
16 * see the COPYRIGHT file for full details. Squid incorporates
17 * software developed and/or copyrighted by other sources. Please see
18 * the CREDITS file for full details.
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
38 const char *RequestMethodStr
[] =
50 const char *ProtocolStr
[] =
69 static const char *const hex
= "0123456789abcdef";
70 static request_t
*urnParse(method_t method
, char *urn
);
71 static const char *const valid_hostname_chars
=
72 #if ALLOW_HOSTNAME_UNDERSCORES
73 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
74 "abcdefghijklmnopqrstuvwxyz"
77 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
78 "abcdefghijklmnopqrstuvwxyz"
82 /* convert %xx in url string to a character
83 * Allocate a new string and return a pointer to converted string */
86 url_convert_hex(char *org_url
, int allocate
)
88 static char code
[] = "00";
92 url
= allocate
? (char *) xstrdup(org_url
) : org_url
;
93 if ((int) strlen(url
) < 3 || !strchr(url
, '%'))
95 for (s
= t
= url
; *(s
+ 2); s
++) {
99 *t
++ = (char) strtol(code
, NULL
, 16);
113 debug(23, 5) ("urlInitialize: Initializing...\n");
114 assert(sizeof(ProtocolStr
) == (PROTO_MAX
+ 1) * sizeof(char *));
115 memset(&null_request_flags
, '\0', sizeof(null_request_flags
));
119 urlParseMethod(const char *s
)
121 if (strcasecmp(s
, "GET") == 0) {
123 } else if (strcasecmp(s
, "POST") == 0) {
125 } else if (strcasecmp(s
, "PUT") == 0) {
127 } else if (strcasecmp(s
, "HEAD") == 0) {
129 } else if (strcasecmp(s
, "CONNECT") == 0) {
130 return METHOD_CONNECT
;
131 } else if (strcasecmp(s
, "TRACE") == 0) {
133 } else if (strcasecmp(s
, "PURGE") == 0) {
141 urlParseProtocol(const char *s
)
143 /* test common stuff first */
144 if (strcasecmp(s
, "http") == 0)
146 if (strcasecmp(s
, "ftp") == 0)
148 if (strcasecmp(s
, "https") == 0)
150 if (strcasecmp(s
, "file") == 0)
152 if (strcasecmp(s
, "gopher") == 0)
154 if (strcasecmp(s
, "wais") == 0)
156 if (strcasecmp(s
, "cache_object") == 0)
157 return PROTO_CACHEOBJ
;
158 if (strcasecmp(s
, "urn") == 0)
160 if (strcasecmp(s
, "whois") == 0)
162 if (strcasecmp(s
, "internal") == 0)
163 return PROTO_INTERNAL
;
169 urlDefaultPort(protocol_t p
)
184 return CACHE_HTTP_PORT
;
193 urlParse(method_t method
, char *url
)
195 LOCAL_ARRAY(char, proto
, MAX_URL
);
196 LOCAL_ARRAY(char, login
, MAX_URL
);
197 LOCAL_ARRAY(char, host
, MAX_URL
);
198 LOCAL_ARRAY(char, urlpath
, MAX_URL
);
199 request_t
*request
= NULL
;
202 protocol_t protocol
= PROTO_NONE
;
204 proto
[0] = host
[0] = urlpath
[0] = login
[0] = '\0';
206 if ((l
= strlen(url
)) + Config
.appendDomainLen
> (MAX_URL
- 1)) {
207 /* terminate so it doesn't overflow other buffers */
208 *(url
+ (MAX_URL
>> 1)) = '\0';
209 debug(23, 1) ("urlParse: URL too large (%d bytes)\n", l
);
212 if (method
== METHOD_CONNECT
) {
214 if (sscanf(url
, "%[^:]:%d", host
, &port
) < 1)
216 } else if (!strncmp(url
, "urn:", 4)) {
217 return urnParse(method
, url
);
219 if (sscanf(url
, "%[^:]://%[^/]%[^\r\n]", proto
, host
, urlpath
) < 2)
221 protocol
= urlParseProtocol(proto
);
222 port
= urlDefaultPort(protocol
);
223 /* Is there any login informaiton? */
224 if ((t
= strrchr(host
, '@'))) {
226 t
= strrchr(login
, '@');
230 if ((t
= strrchr(host
, ':'))) {
236 for (t
= host
; *t
; t
++)
238 if (strspn(host
, valid_hostname_chars
) != strlen(host
)) {
239 debug(23, 1) ("urlParse: Illegal character in hostname '%s'\n", host
);
242 /* remove trailing dots from hostnames */
243 while ((l
= strlen(host
)) > 0 && host
[--l
] == '.')
245 if (Config
.appendDomain
&& !strchr(host
, '.'))
246 strncat(host
, Config
.appendDomain
, SQUIDHOSTNAMELEN
);
248 debug(23, 3) ("urlParse: Invalid port == 0\n");
251 #ifdef HARDCODE_DENY_PORTS
252 /* These ports are filtered in the default squid.conf, but
253 * maybe someone wants them hardcoded... */
254 if (port
== 7 || port
== 9 || port
= 19) {
255 debug(23, 0) ("urlParse: Deny access to port %d\n", port
);
259 if (stringHasWhitespace(urlpath
)) {
260 debug(23, 2) ("urlParse: URI has whitespace: {%s}\n", url
);
261 switch (Config
.uri_whitespace
) {
262 case URI_WHITESPACE_DENY
:
264 case URI_WHITESPACE_ALLOW
:
266 case URI_WHITESPACE_ENCODE
:
267 t
= rfc1738_escape(urlpath
);
268 xstrncpy(urlpath
, t
, MAX_URL
);
270 case URI_WHITESPACE_CHOP
:
271 *(urlpath
+ strcspn(urlpath
, w_space
)) = '\0';
275 request
= requestCreate(method
, protocol
, urlpath
);
276 xstrncpy(request
->host
, host
, SQUIDHOSTNAMELEN
);
277 xstrncpy(request
->login
, login
, MAX_LOGIN_SZ
);
278 request
->port
= (u_short
) port
;
283 urnParse(method_t method
, char *urn
)
285 debug(50, 5) ("urnParse: %s\n", urn
);
286 return requestCreate(method
, PROTO_URN
, urn
+ 4);
290 urlCanonical(request_t
* request
)
292 LOCAL_ARRAY(char, portbuf
, 32);
293 LOCAL_ARRAY(char, urlbuf
, MAX_URL
);
294 if (request
->canonical
)
295 return request
->canonical
;
296 if (request
->protocol
== PROTO_URN
) {
297 snprintf(urlbuf
, MAX_URL
, "urn:%s", strBuf(request
->urlpath
));
299 switch (request
->method
) {
301 snprintf(urlbuf
, MAX_URL
, "%s:%d", request
->host
, request
->port
);
305 if (request
->port
!= urlDefaultPort(request
->protocol
))
306 snprintf(portbuf
, 32, ":%d", request
->port
);
307 snprintf(urlbuf
, MAX_URL
, "%s://%s%s%s%s%s",
308 ProtocolStr
[request
->protocol
],
310 *request
->login
? "@" : null_string
,
313 strBuf(request
->urlpath
));
317 return (request
->canonical
= xstrdup(urlbuf
));
321 urlCanonicalClean(const request_t
* request
)
323 LOCAL_ARRAY(char, buf
, MAX_URL
);
324 LOCAL_ARRAY(char, portbuf
, 32);
325 LOCAL_ARRAY(char, loginbuf
, MAX_LOGIN_SZ
+ 1);
327 if (request
->protocol
== PROTO_URN
) {
328 snprintf(buf
, MAX_URL
, "urn:%s", strBuf(request
->urlpath
));
330 switch (request
->method
) {
332 snprintf(buf
, MAX_URL
, "%s:%d", request
->host
, request
->port
);
336 if (request
->port
!= urlDefaultPort(request
->protocol
))
337 snprintf(portbuf
, 32, ":%d", request
->port
);
339 if ((int) strlen(request
->login
) > 0) {
340 strcpy(loginbuf
, request
->login
);
341 if ((t
= strchr(loginbuf
, ':')))
343 strcat(loginbuf
, "@");
345 snprintf(buf
, MAX_URL
, "%s://%s%s%s%s",
346 ProtocolStr
[request
->protocol
],
350 strBuf(request
->urlpath
));
352 * strip arguments AFTER a question-mark
354 if (Config
.onoff
.strip_query_terms
)
355 if ((t
= strchr(buf
, '?')))
360 if (stringHasWhitespace(buf
))
361 xstrncpy(buf
, rfc1738_escape(buf
), MAX_URL
);
366 matchDomainName(const char *domain
, const char *host
)
369 if ((offset
= strlen(host
) - strlen(domain
)) < 0)
370 return 0; /* host too short */
371 if (strcasecmp(domain
, host
+ offset
) != 0)
372 return 0; /* no match at all */
377 if (*(host
+ offset
- 1) == '.')
383 urlCheckRequest(const request_t
* r
)
386 /* protocol "independent" methods */
387 if (r
->method
== METHOD_CONNECT
)
389 if (r
->method
== METHOD_TRACE
)
391 if (r
->method
== METHOD_PURGE
)
393 /* does method match the protocol? */
394 switch (r
->protocol
) {
402 if (r
->method
== METHOD_PUT
)
407 if (r
->method
== METHOD_GET
)
409 else if (r
->method
== METHOD_HEAD
)
419 * Quick-n-dirty host extraction from a URL. Steps:
421 * Skip any '/' after the colon
422 * Copy the next SQUID_MAXHOSTNAMELEN bytes to host[]
423 * Look for an ending '/' or ':' and terminate
424 * Look for login info preceeded by '@'
427 urlHostname(const char *url
)
429 LOCAL_ARRAY(char, host
, SQUIDHOSTNAMELEN
);
432 if (NULL
== (t
= strchr(url
, ':')))
435 while (*t
!= '\0' && *t
== '/')
437 xstrncpy(host
, t
, SQUIDHOSTNAMELEN
);
438 if ((t
= strchr(host
, '/')))
440 if ((t
= strchr(host
, ':')))
442 if ((t
= strrchr(host
, '@'))) {
444 xmemmove(host
, t
, strlen(t
) + 1);