]> git.ipfire.org Git - thirdparty/squid.git/blob - lib/rfc1738.c
Merge from trunk
[thirdparty/squid.git] / lib / rfc1738.c
1 /*
2 * $Id: rfc1738.c,v 1.28 2007/12/06 02:37:15 amosjeffries Exp $
3 *
4 * DEBUG:
5 * AUTHOR: Harvest Derived
6 *
7 * SQUID Web Proxy Cache http://www.squid-cache.org/
8 * ----------------------------------------------------------
9 *
10 * Squid is the result of efforts by numerous individuals from
11 * the Internet community; see the CONTRIBUTORS file for full
12 * details. Many organizations have provided support for Squid's
13 * development; see the SPONSORS file for full details. Squid is
14 * Copyrighted (C) 2001 by the Regents of the University of
15 * California; see the COPYRIGHT file for full details. Squid
16 * incorporates software developed and/or copyrighted by other
17 * sources; see the CREDITS file for full details.
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; either version 2 of the License, or
22 * (at your option) any later version.
23 *
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, write to the Free Software
31 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
32 *
33 */
34
35 #include "config.h"
36
37 #if HAVE_STDIO_H
38 #include <stdio.h>
39 #endif
40 #if HAVE_STRING_H
41 #include <string.h>
42 #endif
43
44 #include "util.h"
45
46 /*
47 * RFC 1738 defines that these characters should be escaped, as well
48 * any non-US-ASCII character or anything between 0x00 - 0x1F.
49 */
50 static char rfc1738_unsafe_chars[] =
51 {
52 (char) 0x3C, /* < */
53 (char) 0x3E, /* > */
54 (char) 0x22, /* " */
55 (char) 0x23, /* # */
56 #if 0 /* done in code */
57 (char) 0x25, /* % */
58 #endif
59 (char) 0x7B, /* { */
60 (char) 0x7D, /* } */
61 (char) 0x7C, /* | */
62 (char) 0x5C, /* \ */
63 (char) 0x5E, /* ^ */
64 (char) 0x7E, /* ~ */
65 (char) 0x5B, /* [ */
66 (char) 0x5D, /* ] */
67 (char) 0x60, /* ` */
68 (char) 0x27, /* ' */
69 (char) 0x20 /* space */
70 };
71
72 static char rfc1738_reserved_chars[] =
73 {
74 (char) 0x3b, /* ; */
75 (char) 0x2f, /* / */
76 (char) 0x3f, /* ? */
77 (char) 0x3a, /* : */
78 (char) 0x40, /* @ */
79 (char) 0x3d, /* = */
80 (char) 0x26 /* & */
81 };
82
83 /*
84 * rfc1738_escape - Returns a static buffer contains the RFC 1738
85 * compliant, escaped version of the given url.
86 */
87 static char *
88 rfc1738_do_escape(const char *url, int encode_reserved)
89 {
90 static char *buf;
91 static size_t bufsize = 0;
92 const char *p;
93 char *q;
94 unsigned int i, do_escape;
95
96 if (buf == NULL || strlen(url) * 3 > bufsize) {
97 xfree(buf);
98 bufsize = strlen(url) * 3 + 1;
99 buf = xcalloc(bufsize, 1);
100 }
101 for (p = url, q = buf; *p != '\0' && q < (buf + bufsize - 1); p++, q++) {
102 do_escape = 0;
103
104 /* RFC 1738 defines these chars as unsafe */
105 for (i = 0; i < sizeof(rfc1738_unsafe_chars); i++) {
106 if (*p == rfc1738_unsafe_chars[i]) {
107 do_escape = 1;
108 break;
109 }
110 }
111 /* Handle % separately */
112 if (encode_reserved >= 0 && *p == '%')
113 do_escape = 1;
114 /* RFC 1738 defines these chars as reserved */
115 for (i = 0; i < sizeof(rfc1738_reserved_chars) && encode_reserved > 0; i++) {
116 if (*p == rfc1738_reserved_chars[i]) {
117 do_escape = 1;
118 break;
119 }
120 }
121 /* RFC 1738 says any control chars (0x00-0x1F) are encoded */
122 if ((unsigned char) *p <= (unsigned char) 0x1F) {
123 do_escape = 1;
124 }
125 /* RFC 1738 says 0x7f is encoded */
126 if (*p == (char) 0x7F) {
127 do_escape = 1;
128 }
129 /* RFC 1738 says any non-US-ASCII are encoded */
130 if (((unsigned char) *p >= (unsigned char) 0x80)) {
131 do_escape = 1;
132 }
133 /* Do the triplet encoding, or just copy the char */
134 /* note: we do not need snprintf here as q is appropriately
135 * allocated - KA */
136
137 if (do_escape == 1) {
138 (void) sprintf(q, "%%%02X", (unsigned char) *p);
139 q += sizeof(char) * 2;
140 } else {
141 *q = *p;
142 }
143 }
144 *q = '\0';
145 return (buf);
146 }
147
148 /*
149 * rfc1738_escape - Returns a static buffer that contains the RFC
150 * 1738 compliant, escaped version of the given url.
151 */
152 char *
153 rfc1738_escape(const char *url)
154 {
155 return rfc1738_do_escape(url, 0);
156 }
157
158 /*
159 * rfc1738_escape_unescaped - Returns a static buffer that contains
160 * the RFC 1738 compliant, escaped version of the given url.
161 */
162 char *
163 rfc1738_escape_unescaped(const char *url)
164 {
165 return rfc1738_do_escape(url, -1);
166 }
167
168 /*
169 * rfc1738_escape_part - Returns a static buffer that contains the
170 * RFC 1738 compliant, escaped version of the given url segment.
171 */
172 char *
173 rfc1738_escape_part(const char *url)
174 {
175 return rfc1738_do_escape(url, 1);
176 }
177
178 /*
179 * rfc1738_unescape() - Converts escaped characters (%xy numbers) in
180 * given the string. %% is a %. %ab is the 8-bit hexadecimal number "ab"
181 */
182 void
183 rfc1738_unescape(char *s)
184 {
185 char hexnum[3];
186 int i, j; /* i is write, j is read */
187 unsigned int x;
188 for (i = j = 0; s[j]; i++, j++) {
189 s[i] = s[j];
190 if (s[i] != '%')
191 continue;
192 if (s[j + 1] == '%') { /* %% case */
193 j++;
194 continue;
195 }
196 if (s[j + 1] && s[j + 2]) {
197 if (s[j + 1] == '0' && s[j + 2] == '0') { /* %00 case */
198 j += 2;
199 continue;
200 }
201 hexnum[0] = s[j + 1];
202 hexnum[1] = s[j + 2];
203 hexnum[2] = '\0';
204 if (1 == sscanf(hexnum, "%x", &x)) {
205 s[i] = (char) (0x0ff & x);
206 j += 2;
207 }
208 }
209 }
210 s[i] = '\0';
211 }