]> git.ipfire.org Git - thirdparty/util-linux.git/blob - libblkid/src/encode.c
Merge branch 'eject-sparc' of https://github.com/mator/util-linux
[thirdparty/util-linux.git] / libblkid / src / encode.c
1
2 /*
3 * encode.c - string conversion routines (mostly for compatibility with
4 * udev/volume_id)
5 *
6 * Copyright (C) 2008 Kay Sievers <kay.sievers@vrfy.org>
7 * Copyright (C) 2009 Karel Zak <kzak@redhat.com>
8 *
9 * This file may be redistributed under the terms of the
10 * GNU Lesser General Public License.
11 */
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <stddef.h>
15 #include <unistd.h>
16 #include <errno.h>
17 #include <string.h>
18 #include <ctype.h>
19
20 #include "blkidP.h"
21
22 #define UDEV_ALLOWED_CHARS_INPUT "/ $%?,"
23
24 /**
25 * SECTION: encode
26 * @title: Encoding utils
27 * @short_description: encode strings to safe udev-compatible formats
28 *
29 */
30
31 /* count of characters used to encode one unicode char */
32 static int utf8_encoded_expected_len(const char *str)
33 {
34 unsigned char c = (unsigned char)str[0];
35
36 if (c < 0x80)
37 return 1;
38 if ((c & 0xe0) == 0xc0)
39 return 2;
40 if ((c & 0xf0) == 0xe0)
41 return 3;
42 if ((c & 0xf8) == 0xf0)
43 return 4;
44 if ((c & 0xfc) == 0xf8)
45 return 5;
46 if ((c & 0xfe) == 0xfc)
47 return 6;
48 return 0;
49 }
50
51 /* decode one unicode char */
52 static int utf8_encoded_to_unichar(const char *str)
53 {
54 int unichar;
55 int len;
56 int i;
57
58 len = utf8_encoded_expected_len(str);
59 switch (len) {
60 case 1:
61 return (int)str[0];
62 case 2:
63 unichar = str[0] & 0x1f;
64 break;
65 case 3:
66 unichar = (int)str[0] & 0x0f;
67 break;
68 case 4:
69 unichar = (int)str[0] & 0x07;
70 break;
71 case 5:
72 unichar = (int)str[0] & 0x03;
73 break;
74 case 6:
75 unichar = (int)str[0] & 0x01;
76 break;
77 default:
78 return -1;
79 }
80
81 for (i = 1; i < len; i++) {
82 if (((int)str[i] & 0xc0) != 0x80)
83 return -1;
84 unichar <<= 6;
85 unichar |= (int)str[i] & 0x3f;
86 }
87
88 return unichar;
89 }
90
91 /* expected size used to encode one unicode char */
92 static int utf8_unichar_to_encoded_len(int unichar)
93 {
94 if (unichar < 0x80)
95 return 1;
96 if (unichar < 0x800)
97 return 2;
98 if (unichar < 0x10000)
99 return 3;
100 if (unichar < 0x200000)
101 return 4;
102 if (unichar < 0x4000000)
103 return 5;
104 return 6;
105 }
106
107 /* check if unicode char has a valid numeric range */
108 static int utf8_unichar_valid_range(int unichar)
109 {
110 if (unichar > 0x10ffff)
111 return 0;
112 if ((unichar & 0xfffff800) == 0xd800)
113 return 0;
114 if ((unichar > 0xfdcf) && (unichar < 0xfdf0))
115 return 0;
116 if ((unichar & 0xffff) == 0xffff)
117 return 0;
118 return 1;
119 }
120
121 /* validate one encoded unicode char and return its length */
122 static int utf8_encoded_valid_unichar(const char *str)
123 {
124 int len;
125 int unichar;
126 int i;
127
128 len = utf8_encoded_expected_len(str);
129 if (len == 0)
130 return -1;
131
132 /* ascii is valid */
133 if (len == 1)
134 return 1;
135
136 /* check if expected encoded chars are available */
137 for (i = 0; i < len; i++)
138 if ((str[i] & 0x80) != 0x80)
139 return -1;
140
141 unichar = utf8_encoded_to_unichar(str);
142
143 /* check if encoded length matches encoded value */
144 if (utf8_unichar_to_encoded_len(unichar) != len)
145 return -1;
146
147 /* check if value has valid range */
148 if (!utf8_unichar_valid_range(unichar))
149 return -1;
150
151 return len;
152 }
153
154 static int replace_whitespace(const char *str, char *to, size_t len)
155 {
156 size_t i, j;
157
158 /* strip trailing whitespace */
159 len = strnlen(str, len);
160 while (len && isspace(str[len-1]))
161 len--;
162
163 /* strip leading whitespace */
164 i = 0;
165 while ((i < len) && isspace(str[i]))
166 i++;
167
168 j = 0;
169 while (i < len) {
170 /* substitute multiple whitespace with a single '_' */
171 if (isspace(str[i])) {
172 while (isspace(str[i]))
173 i++;
174 to[j++] = '_';
175 }
176 to[j++] = str[i++];
177 }
178 to[j] = '\0';
179 return 0;
180 }
181
182 static int is_whitelisted(char c, const char *white)
183 {
184 if ((c >= '0' && c <= '9') ||
185 (c >= 'A' && c <= 'Z') ||
186 (c >= 'a' && c <= 'z') ||
187 strchr("#+-.:=@_", c) != NULL ||
188 (white != NULL && strchr(white, c) != NULL))
189 return 1;
190 return 0;
191 }
192
193 /* allow chars in whitelist, plain ascii, hex-escaping and valid utf8 */
194 static int replace_chars(char *str, const char *white)
195 {
196 size_t i = 0;
197 int replaced = 0;
198
199 while (str[i] != '\0') {
200 int len;
201
202 if (is_whitelisted(str[i], white)) {
203 i++;
204 continue;
205 }
206
207 /* accept hex encoding */
208 if (str[i] == '\\' && str[i+1] == 'x') {
209 i += 2;
210 continue;
211 }
212
213 /* accept valid utf8 */
214 len = utf8_encoded_valid_unichar(&str[i]);
215 if (len > 1) {
216 i += len;
217 continue;
218 }
219
220 /* if space is allowed, replace whitespace with ordinary space */
221 if (isspace(str[i]) && white != NULL && strchr(white, ' ') != NULL) {
222 str[i] = ' ';
223 i++;
224 replaced++;
225 continue;
226 }
227
228 /* everything else is replaced with '_' */
229 str[i] = '_';
230 i++;
231 replaced++;
232 }
233 return replaced;
234 }
235
236 /**
237 * blkid_encode_string:
238 * @str: input string to be encoded
239 * @str_enc: output string to store the encoded input string
240 * @len: maximum size of the output string, which may be
241 * four times as long as the input string
242 *
243 * Encode all potentially unsafe characters of a string to the
244 * corresponding hex value prefixed by '\x'.
245 *
246 * Returns: 0 if the entire string was copied, non-zero otherwise.
247 **/
248 int blkid_encode_string(const char *str, char *str_enc, size_t len)
249 {
250 size_t i, j;
251
252 if (!str || !str_enc || !len)
253 return -1;
254
255 for (i = 0, j = 0; str[i] != '\0'; i++) {
256 int seqlen;
257
258 seqlen = utf8_encoded_valid_unichar(&str[i]);
259 if (seqlen > 1) {
260 if (len-j < (size_t)seqlen)
261 goto err;
262 memcpy(&str_enc[j], &str[i], seqlen);
263 j += seqlen;
264 i += (seqlen-1);
265 } else if (str[i] == '\\' || !is_whitelisted(str[i], NULL)) {
266 if (len-j < 4)
267 goto err;
268 sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
269 j += 4;
270 } else {
271 if (len-j < 1)
272 goto err;
273 str_enc[j] = str[i];
274 j++;
275 }
276 if (j+3 >= len)
277 goto err;
278 }
279 if (len-j < 1)
280 goto err;
281 str_enc[j] = '\0';
282 return 0;
283 err:
284 return -1;
285 }
286
287 /**
288 * blkid_safe_string:
289 * @str: input string
290 * @str_safe: output string
291 * @len: size of output string
292 *
293 * Allows plain ascii, hex-escaping and valid utf8. Replaces all whitespaces
294 * with '_'.
295 *
296 * Returns: 0 on success or -1 in case of error.
297 */
298 int blkid_safe_string(const char *str, char *str_safe, size_t len)
299 {
300 if (!str || !str_safe || !len)
301 return -1;
302 replace_whitespace(str, str_safe, len);
303 replace_chars(str_safe, UDEV_ALLOWED_CHARS_INPUT);
304 return 0;
305 }