]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (C) 1996-2025 The Squid Software Foundation and contributors | |
3 | * | |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
9 | #include "squid.h" | |
10 | #include "rfc1738.h" | |
11 | ||
12 | #if HAVE_STRING_H | |
13 | #include <string.h> | |
14 | #endif | |
15 | ||
16 | /* | |
17 | * RFC 1738 defines that these characters should be escaped, as well | |
18 | * any non-US-ASCII character or anything between 0x00 - 0x1F. | |
19 | */ | |
20 | static char rfc1738_unsafe_chars[] = { | |
21 | (char) 0x3C, /* < */ | |
22 | (char) 0x3E, /* > */ | |
23 | (char) 0x22, /* " */ | |
24 | (char) 0x23, /* # */ | |
25 | #if 0 /* done in code */ | |
26 | (char) 0x20, /* space */ | |
27 | (char) 0x25, /* % */ | |
28 | #endif | |
29 | (char) 0x7B, /* { */ | |
30 | (char) 0x7D, /* } */ | |
31 | (char) 0x7C, /* | */ | |
32 | (char) 0x5C, /* \ */ | |
33 | (char) 0x5E, /* ^ */ | |
34 | (char) 0x7E, /* ~ */ | |
35 | (char) 0x5B, /* [ */ | |
36 | (char) 0x5D, /* ] */ | |
37 | (char) 0x60, /* ` */ | |
38 | (char) 0x27 /* ' */ | |
39 | }; | |
40 | ||
41 | static char rfc1738_reserved_chars[] = { | |
42 | (char) 0x3b, /* ; */ | |
43 | (char) 0x2f, /* / */ | |
44 | (char) 0x3f, /* ? */ | |
45 | (char) 0x3a, /* : */ | |
46 | (char) 0x40, /* @ */ | |
47 | (char) 0x3d, /* = */ | |
48 | (char) 0x26 /* & */ | |
49 | }; | |
50 | ||
51 | /* | |
52 | * rfc1738_escape - Returns a static buffer contains the RFC 1738 | |
53 | * compliant, escaped version of the given url. | |
54 | */ | |
55 | char * | |
56 | rfc1738_do_escape(const char *url, int flags) | |
57 | { | |
58 | static char *buf; | |
59 | static size_t bufsize = 0; | |
60 | const char *src; | |
61 | char *dst; | |
62 | unsigned int i, do_escape; | |
63 | ||
64 | if (buf == NULL || strlen(url) * 3 > bufsize) { | |
65 | xfree(buf); | |
66 | bufsize = strlen(url) * 3 + 1; | |
67 | buf = (char*)xcalloc(bufsize, 1); | |
68 | } | |
69 | for (src = url, dst = buf; *src != '\0' && dst < (buf + bufsize - 1); src++, dst++) { | |
70 | ||
71 | /* a-z, A-Z and 0-9 are SAFE. */ | |
72 | if ((*src >= 'a' && *src <= 'z') || (*src >= 'A' && *src <= 'Z') || (*src >= '0' && *src <= '9')) { | |
73 | *dst = *src; | |
74 | continue; | |
75 | } | |
76 | ||
77 | do_escape = 0; | |
78 | ||
79 | /* RFC 1738 defines these chars as unsafe */ | |
80 | if ((flags & RFC1738_ESCAPE_UNSAFE)) { | |
81 | for (i = 0; i < sizeof(rfc1738_unsafe_chars); i++) { | |
82 | if (*src == rfc1738_unsafe_chars[i]) { | |
83 | do_escape = 1; | |
84 | break; | |
85 | } | |
86 | } | |
87 | /* Handle % separately */ | |
88 | if (!(flags & RFC1738_ESCAPE_NOPERCENT) && *src == '%') | |
89 | do_escape = 1; | |
90 | /* Handle space separately */ | |
91 | else if (!(flags & RFC1738_ESCAPE_NOSPACE) && *src <= ' ') | |
92 | do_escape = 1; | |
93 | } | |
94 | /* RFC 1738 defines these chars as reserved */ | |
95 | if ((flags & RFC1738_ESCAPE_RESERVED) && do_escape == 0) { | |
96 | for (i = 0; i < sizeof(rfc1738_reserved_chars); i++) { | |
97 | if (*src == rfc1738_reserved_chars[i]) { | |
98 | do_escape = 1; | |
99 | break; | |
100 | } | |
101 | } | |
102 | } | |
103 | if ((flags & RFC1738_ESCAPE_CTRLS) && do_escape == 0) { | |
104 | /* RFC 1738 says any control chars (0x00-0x1F) are encoded */ | |
105 | if ((unsigned char) *src <= (unsigned char) 0x1F) | |
106 | do_escape = 1; | |
107 | /* RFC 1738 says 0x7f is encoded */ | |
108 | else if (*src == (char) 0x7F) | |
109 | do_escape = 1; | |
110 | /* RFC 1738 says any non-US-ASCII are encoded */ | |
111 | else if (((unsigned char) *src >= (unsigned char) 0x80)) | |
112 | do_escape = 1; | |
113 | } | |
114 | /* Do the triplet encoding, or just copy the char */ | |
115 | if (do_escape == 1) { | |
116 | (void) snprintf(dst, (bufsize-(dst-buf)), "%%%02X", (unsigned char) *src); | |
117 | dst += sizeof(char) * 2; | |
118 | } else { | |
119 | *dst = *src; | |
120 | } | |
121 | } | |
122 | *dst = '\0'; | |
123 | return (buf); | |
124 | } | |
125 | ||
126 | /* | |
127 | * Converts a ascii hex code into a binary character. | |
128 | */ | |
129 | static int | |
130 | fromhex(char ch) | |
131 | { | |
132 | if (ch >= '0' && ch <= '9') | |
133 | return ch - '0'; | |
134 | if (ch >= 'a' && ch <= 'f') | |
135 | return ch - 'a' + 10; | |
136 | if (ch >= 'A' && ch <= 'F') | |
137 | return ch - 'A' + 10; | |
138 | return -1; | |
139 | } | |
140 | ||
141 | /* | |
142 | * rfc1738_unescape() - Converts escaped characters (%xy numbers) in | |
143 | * given the string. %% is a %. %ab is the 8-bit hexadecimal number "ab" | |
144 | */ | |
145 | void | |
146 | rfc1738_unescape(char *s) | |
147 | { | |
148 | int i, j; /* i is write, j is read */ | |
149 | for (i = j = 0; s[j]; i++, j++) { | |
150 | s[i] = s[j]; | |
151 | if (s[j] != '%') { | |
152 | /* normal case, nothing more to do */ | |
153 | } else if (s[j + 1] == '%') { /* %% case */ | |
154 | j++; /* Skip % */ | |
155 | } else { | |
156 | /* decode */ | |
157 | int v1, v2, x; | |
158 | v1 = fromhex(s[j + 1]); | |
159 | if (v1 < 0) | |
160 | continue; /* non-hex or \0 */ | |
161 | v2 = fromhex(s[j + 2]); | |
162 | if (v2 < 0) | |
163 | continue; /* non-hex or \0 */ | |
164 | x = v1 << 4 | v2; | |
165 | if (x > 0 && x <= 255) { | |
166 | s[i] = x; | |
167 | j += 2; | |
168 | } | |
169 | } | |
170 | } | |
171 | s[i] = '\0'; | |
172 | } | |
173 |