]> git.ipfire.org Git - thirdparty/squid.git/blob - lib/rfc1738.c
Merge from trunk
[thirdparty/squid.git] / lib / rfc1738.c
1 /*
2 * DEBUG:
3 * AUTHOR: Harvest Derived
4 *
5 * SQUID Web Proxy Cache http://www.squid-cache.org/
6 * ----------------------------------------------------------
7 *
8 * Squid is the result of efforts by numerous individuals from
9 * the Internet community; see the CONTRIBUTORS file for full
10 * details. Many organizations have provided support for Squid's
11 * development; see the SPONSORS file for full details. Squid is
12 * Copyrighted (C) 2001 by the Regents of the University of
13 * California; see the COPYRIGHT file for full details. Squid
14 * incorporates software developed and/or copyrighted by other
15 * sources; see the CREDITS file for full details.
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
30 *
31 */
32
33 #include "squid.h"
34 #include "rfc1738.h"
35
36 #if HAVE_STRING_H
37 #include <string.h>
38 #endif
39
40 /*
41 * RFC 1738 defines that these characters should be escaped, as well
42 * any non-US-ASCII character or anything between 0x00 - 0x1F.
43 */
44 static char rfc1738_unsafe_chars[] = {
45 (char) 0x3C, /* < */
46 (char) 0x3E, /* > */
47 (char) 0x22, /* " */
48 (char) 0x23, /* # */
49 #if 0 /* done in code */
50 (char) 0x20, /* space */
51 (char) 0x25, /* % */
52 #endif
53 (char) 0x7B, /* { */
54 (char) 0x7D, /* } */
55 (char) 0x7C, /* | */
56 (char) 0x5C, /* \ */
57 (char) 0x5E, /* ^ */
58 (char) 0x7E, /* ~ */
59 (char) 0x5B, /* [ */
60 (char) 0x5D, /* ] */
61 (char) 0x60, /* ` */
62 (char) 0x27 /* ' */
63 };
64
65 static char rfc1738_reserved_chars[] = {
66 (char) 0x3b, /* ; */
67 (char) 0x2f, /* / */
68 (char) 0x3f, /* ? */
69 (char) 0x3a, /* : */
70 (char) 0x40, /* @ */
71 (char) 0x3d, /* = */
72 (char) 0x26 /* & */
73 };
74
75 /*
76 * rfc1738_escape - Returns a static buffer contains the RFC 1738
77 * compliant, escaped version of the given url.
78 */
79 char *
80 rfc1738_do_escape(const char *url, int flags)
81 {
82 static char *buf;
83 static size_t bufsize = 0;
84 const char *src;
85 char *dst;
86 unsigned int i, do_escape;
87
88 if (buf == NULL || strlen(url) * 3 > bufsize) {
89 xfree(buf);
90 bufsize = strlen(url) * 3 + 1;
91 buf = (char*)xcalloc(bufsize, 1);
92 }
93 for (src = url, dst = buf; *src != '\0' && dst < (buf + bufsize - 1); src++, dst++) {
94
95 /* a-z, A-Z and 0-9 are SAFE. */
96 if ((*src >= 'a' && *src <= 'z') || (*src >= 'A' && *src <= 'Z') || (*src >= '0' && *src <= '9')) {
97 *dst = *src;
98 continue;
99 }
100
101 do_escape = 0;
102
103 /* RFC 1738 defines these chars as unsafe */
104 if ((flags & RFC1738_ESCAPE_UNSAFE)) {
105 for (i = 0; i < sizeof(rfc1738_unsafe_chars); i++) {
106 if (*src == rfc1738_unsafe_chars[i]) {
107 do_escape = 1;
108 break;
109 }
110 }
111 /* Handle % separately */
112 if (!(flags & RFC1738_ESCAPE_NOPERCENT) && *src == '%')
113 do_escape = 1;
114 /* Handle space separately */
115 else if (!(flags & RFC1738_ESCAPE_NOSPACE) && *src <= ' ')
116 do_escape = 1;
117 }
118 /* RFC 1738 defines these chars as reserved */
119 if ((flags & RFC1738_ESCAPE_RESERVED) && do_escape == 0) {
120 for (i = 0; i < sizeof(rfc1738_reserved_chars); i++) {
121 if (*src == rfc1738_reserved_chars[i]) {
122 do_escape = 1;
123 break;
124 }
125 }
126 }
127 if ((flags & RFC1738_ESCAPE_CTRLS) && do_escape == 0) {
128 /* RFC 1738 says any control chars (0x00-0x1F) are encoded */
129 if ((unsigned char) *src <= (unsigned char) 0x1F)
130 do_escape = 1;
131 /* RFC 1738 says 0x7f is encoded */
132 else if (*src == (char) 0x7F)
133 do_escape = 1;
134 /* RFC 1738 says any non-US-ASCII are encoded */
135 else if (((unsigned char) *src >= (unsigned char) 0x80))
136 do_escape = 1;
137 }
138 /* Do the triplet encoding, or just copy the char */
139 if (do_escape == 1) {
140 (void) snprintf(dst, (bufsize-(dst-buf)), "%%%02X", (unsigned char) *src);
141 dst += sizeof(char) * 2;
142 } else {
143 *dst = *src;
144 }
145 }
146 *dst = '\0';
147 return (buf);
148 }
149
150 /*
151 * Converts a ascii hex code into a binary character.
152 */
153 static int
154 fromhex(char ch)
155 {
156 if (ch >= '0' && ch <= '9')
157 return ch - '0';
158 if (ch >= 'a' && ch <= 'f')
159 return ch - 'a' + 10;
160 if (ch >= 'A' && ch <= 'F')
161 return ch - 'A' + 10;
162 return -1;
163 }
164
165 /*
166 * rfc1738_unescape() - Converts escaped characters (%xy numbers) in
167 * given the string. %% is a %. %ab is the 8-bit hexadecimal number "ab"
168 */
169 void
170 rfc1738_unescape(char *s)
171 {
172 int i, j; /* i is write, j is read */
173 for (i = j = 0; s[j]; i++, j++) {
174 s[i] = s[j];
175 if (s[j] != '%') {
176 /* normal case, nothing more to do */
177 } else if (s[j + 1] == '%') { /* %% case */
178 j++; /* Skip % */
179 } else {
180 /* decode */
181 int v1, v2, x;
182 v1 = fromhex(s[j + 1]);
183 if (v1 < 0)
184 continue; /* non-hex or \0 */
185 v2 = fromhex(s[j + 2]);
186 if (v2 < 0)
187 continue; /* non-hex or \0 */
188 x = v1 << 4 | v2;
189 if (x > 0 && x <= 255) {
190 s[i] = x;
191 j += 2;
192 }
193 }
194 }
195 s[i] = '\0';
196 }