]>
Commit | Line | Data |
---|---|---|
dd20a05a KZ |
1 | |
2 | /* | |
455fe9a0 | 3 | * encode.c - string conversion routines (mostly for compatibility with |
dd20a05a KZ |
4 | * udev/volume_id) |
5 | * | |
6 | * Copyright (C) 2008 Kay Sievers <kay.sievers@vrfy.org> | |
7 | * Copyright (C) 2009 Karel Zak <kzak@redhat.com> | |
8 | * | |
9 | * This file may be redistributed under the terms of the | |
10 | * GNU Lesser General Public License. | |
11 | */ | |
12 | #include <stdio.h> | |
13 | #include <stdlib.h> | |
14 | #include <stddef.h> | |
15 | #include <unistd.h> | |
16 | #include <errno.h> | |
17 | #include <string.h> | |
18 | #include <ctype.h> | |
19 | ||
dd20a05a KZ |
20 | #include "blkidP.h" |
21 | ||
22 | #define UDEV_ALLOWED_CHARS_INPUT "/ $%?," | |
23 | ||
488e52be KZ |
24 | /** |
25 | * SECTION: encode | |
26 | * @title: Encoding utils | |
27 | * @short_description: encode strings to safe udev-compatible formats | |
28 | * | |
29 | */ | |
30 | ||
dd20a05a KZ |
31 | /* count of characters used to encode one unicode char */ |
32 | static int utf8_encoded_expected_len(const char *str) | |
33 | { | |
34 | unsigned char c = (unsigned char)str[0]; | |
35 | ||
36 | if (c < 0x80) | |
37 | return 1; | |
38 | if ((c & 0xe0) == 0xc0) | |
39 | return 2; | |
40 | if ((c & 0xf0) == 0xe0) | |
41 | return 3; | |
42 | if ((c & 0xf8) == 0xf0) | |
43 | return 4; | |
44 | if ((c & 0xfc) == 0xf8) | |
45 | return 5; | |
46 | if ((c & 0xfe) == 0xfc) | |
47 | return 6; | |
48 | return 0; | |
49 | } | |
50 | ||
51 | /* decode one unicode char */ | |
52 | static int utf8_encoded_to_unichar(const char *str) | |
53 | { | |
54 | int unichar; | |
55 | int len; | |
56 | int i; | |
57 | ||
58 | len = utf8_encoded_expected_len(str); | |
59 | switch (len) { | |
60 | case 1: | |
61 | return (int)str[0]; | |
62 | case 2: | |
63 | unichar = str[0] & 0x1f; | |
64 | break; | |
65 | case 3: | |
66 | unichar = (int)str[0] & 0x0f; | |
67 | break; | |
68 | case 4: | |
69 | unichar = (int)str[0] & 0x07; | |
70 | break; | |
71 | case 5: | |
72 | unichar = (int)str[0] & 0x03; | |
73 | break; | |
74 | case 6: | |
75 | unichar = (int)str[0] & 0x01; | |
76 | break; | |
77 | default: | |
78 | return -1; | |
79 | } | |
80 | ||
81 | for (i = 1; i < len; i++) { | |
82 | if (((int)str[i] & 0xc0) != 0x80) | |
83 | return -1; | |
84 | unichar <<= 6; | |
85 | unichar |= (int)str[i] & 0x3f; | |
86 | } | |
87 | ||
88 | return unichar; | |
89 | } | |
90 | ||
91 | /* expected size used to encode one unicode char */ | |
92 | static int utf8_unichar_to_encoded_len(int unichar) | |
93 | { | |
94 | if (unichar < 0x80) | |
95 | return 1; | |
96 | if (unichar < 0x800) | |
97 | return 2; | |
98 | if (unichar < 0x10000) | |
99 | return 3; | |
100 | if (unichar < 0x200000) | |
101 | return 4; | |
102 | if (unichar < 0x4000000) | |
103 | return 5; | |
104 | return 6; | |
105 | } | |
106 | ||
107 | /* check if unicode char has a valid numeric range */ | |
108 | static int utf8_unichar_valid_range(int unichar) | |
109 | { | |
110 | if (unichar > 0x10ffff) | |
111 | return 0; | |
112 | if ((unichar & 0xfffff800) == 0xd800) | |
113 | return 0; | |
114 | if ((unichar > 0xfdcf) && (unichar < 0xfdf0)) | |
115 | return 0; | |
116 | if ((unichar & 0xffff) == 0xffff) | |
117 | return 0; | |
118 | return 1; | |
119 | } | |
120 | ||
121 | /* validate one encoded unicode char and return its length */ | |
122 | static int utf8_encoded_valid_unichar(const char *str) | |
123 | { | |
124 | int len; | |
125 | int unichar; | |
126 | int i; | |
127 | ||
128 | len = utf8_encoded_expected_len(str); | |
129 | if (len == 0) | |
130 | return -1; | |
131 | ||
132 | /* ascii is valid */ | |
133 | if (len == 1) | |
134 | return 1; | |
135 | ||
136 | /* check if expected encoded chars are available */ | |
137 | for (i = 0; i < len; i++) | |
138 | if ((str[i] & 0x80) != 0x80) | |
139 | return -1; | |
140 | ||
141 | unichar = utf8_encoded_to_unichar(str); | |
142 | ||
143 | /* check if encoded length matches encoded value */ | |
144 | if (utf8_unichar_to_encoded_len(unichar) != len) | |
145 | return -1; | |
146 | ||
147 | /* check if value has valid range */ | |
148 | if (!utf8_unichar_valid_range(unichar)) | |
149 | return -1; | |
150 | ||
151 | return len; | |
152 | } | |
153 | ||
154 | static int replace_whitespace(const char *str, char *to, size_t len) | |
155 | { | |
156 | size_t i, j; | |
157 | ||
158 | /* strip trailing whitespace */ | |
159 | len = strnlen(str, len); | |
160 | while (len && isspace(str[len-1])) | |
161 | len--; | |
162 | ||
163 | /* strip leading whitespace */ | |
164 | i = 0; | |
0cd7ecef | 165 | while ((i < len) && isspace(str[i])) |
dd20a05a KZ |
166 | i++; |
167 | ||
168 | j = 0; | |
169 | while (i < len) { | |
170 | /* substitute multiple whitespace with a single '_' */ | |
171 | if (isspace(str[i])) { | |
172 | while (isspace(str[i])) | |
173 | i++; | |
174 | to[j++] = '_'; | |
175 | } | |
176 | to[j++] = str[i++]; | |
177 | } | |
178 | to[j] = '\0'; | |
179 | return 0; | |
180 | } | |
181 | ||
182 | static int is_whitelisted(char c, const char *white) | |
183 | { | |
184 | if ((c >= '0' && c <= '9') || | |
185 | (c >= 'A' && c <= 'Z') || | |
186 | (c >= 'a' && c <= 'z') || | |
187 | strchr("#+-.:=@_", c) != NULL || | |
188 | (white != NULL && strchr(white, c) != NULL)) | |
189 | return 1; | |
190 | return 0; | |
191 | } | |
192 | ||
193 | /* allow chars in whitelist, plain ascii, hex-escaping and valid utf8 */ | |
194 | static int replace_chars(char *str, const char *white) | |
195 | { | |
196 | size_t i = 0; | |
197 | int replaced = 0; | |
198 | ||
199 | while (str[i] != '\0') { | |
200 | int len; | |
201 | ||
202 | if (is_whitelisted(str[i], white)) { | |
203 | i++; | |
204 | continue; | |
205 | } | |
206 | ||
207 | /* accept hex encoding */ | |
208 | if (str[i] == '\\' && str[i+1] == 'x') { | |
209 | i += 2; | |
210 | continue; | |
211 | } | |
212 | ||
213 | /* accept valid utf8 */ | |
214 | len = utf8_encoded_valid_unichar(&str[i]); | |
215 | if (len > 1) { | |
216 | i += len; | |
217 | continue; | |
218 | } | |
219 | ||
220 | /* if space is allowed, replace whitespace with ordinary space */ | |
221 | if (isspace(str[i]) && white != NULL && strchr(white, ' ') != NULL) { | |
222 | str[i] = ' '; | |
223 | i++; | |
224 | replaced++; | |
225 | continue; | |
226 | } | |
227 | ||
228 | /* everything else is replaced with '_' */ | |
229 | str[i] = '_'; | |
230 | i++; | |
231 | replaced++; | |
232 | } | |
233 | return replaced; | |
234 | } | |
235 | ||
236 | /** | |
237 | * blkid_encode_string: | |
238 | * @str: input string to be encoded | |
239 | * @str_enc: output string to store the encoded input string | |
240 | * @len: maximum size of the output string, which may be | |
241 | * four times as long as the input string | |
242 | * | |
243 | * Encode all potentially unsafe characters of a string to the | |
244 | * corresponding hex value prefixed by '\x'. | |
245 | * | |
246 | * Returns: 0 if the entire string was copied, non-zero otherwise. | |
247 | **/ | |
248 | int blkid_encode_string(const char *str, char *str_enc, size_t len) | |
249 | { | |
250 | size_t i, j; | |
251 | ||
e3436956 | 252 | if (!str || !str_enc || !len) |
dd20a05a KZ |
253 | return -1; |
254 | ||
dd20a05a KZ |
255 | for (i = 0, j = 0; str[i] != '\0'; i++) { |
256 | int seqlen; | |
257 | ||
258 | seqlen = utf8_encoded_valid_unichar(&str[i]); | |
259 | if (seqlen > 1) { | |
3096d61a FZ |
260 | if (len-j < (size_t)seqlen) |
261 | goto err; | |
dd20a05a KZ |
262 | memcpy(&str_enc[j], &str[i], seqlen); |
263 | j += seqlen; | |
264 | i += (seqlen-1); | |
265 | } else if (str[i] == '\\' || !is_whitelisted(str[i], NULL)) { | |
3096d61a FZ |
266 | if (len-j < 4) |
267 | goto err; | |
dd20a05a KZ |
268 | sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]); |
269 | j += 4; | |
270 | } else { | |
3096d61a FZ |
271 | if (len-j < 1) |
272 | goto err; | |
dd20a05a KZ |
273 | str_enc[j] = str[i]; |
274 | j++; | |
275 | } | |
276 | if (j+3 >= len) | |
277 | goto err; | |
278 | } | |
3096d61a FZ |
279 | if (len-j < 1) |
280 | goto err; | |
dd20a05a KZ |
281 | str_enc[j] = '\0'; |
282 | return 0; | |
283 | err: | |
284 | return -1; | |
285 | } | |
286 | ||
287 | /** | |
288 | * blkid_safe_string: | |
289 | * @str: input string | |
290 | * @str_safe: output string | |
291 | * @len: size of output string | |
292 | * | |
293 | * Allows plain ascii, hex-escaping and valid utf8. Replaces all whitespaces | |
294 | * with '_'. | |
488e52be KZ |
295 | * |
296 | * Returns: 0 on success or -1 in case of error. | |
dd20a05a KZ |
297 | */ |
298 | int blkid_safe_string(const char *str, char *str_safe, size_t len) | |
299 | { | |
e3436956 KZ |
300 | if (!str || !str_safe || !len) |
301 | return -1; | |
17d6fe2e | 302 | replace_whitespace(str, str_safe, len); |
dd20a05a KZ |
303 | replace_chars(str_safe, UDEV_ALLOWED_CHARS_INPUT); |
304 | return 0; | |
305 | } |