]>
git.ipfire.org Git - thirdparty/util-linux.git/blob - libblkid/src/encode.c
3 * encode.c - string conversion routines (mostly for compatibility with
6 * Copyright (C) 2008 Kay Sievers <kay.sievers@vrfy.org>
7 * Copyright (C) 2009 Karel Zak <kzak@redhat.com>
9 * This file may be redistributed under the terms of the
10 * GNU Lesser General Public License.
22 #define UDEV_ALLOWED_CHARS_INPUT "/ $%?,"
26 * @title: Encoding utils
27 * @short_description: encode strings to safe udev-compatible formats
31 /* count of characters used to encode one unicode char */
32 static int utf8_encoded_expected_len(const char *str
)
34 unsigned char c
= (unsigned char)str
[0];
38 if ((c
& 0xe0) == 0xc0)
40 if ((c
& 0xf0) == 0xe0)
42 if ((c
& 0xf8) == 0xf0)
44 if ((c
& 0xfc) == 0xf8)
46 if ((c
& 0xfe) == 0xfc)
51 /* decode one unicode char */
52 static int utf8_encoded_to_unichar(const char *str
)
58 len
= utf8_encoded_expected_len(str
);
63 unichar
= str
[0] & 0x1f;
66 unichar
= (int)str
[0] & 0x0f;
69 unichar
= (int)str
[0] & 0x07;
72 unichar
= (int)str
[0] & 0x03;
75 unichar
= (int)str
[0] & 0x01;
81 for (i
= 1; i
< len
; i
++) {
82 if (((int)str
[i
] & 0xc0) != 0x80)
85 unichar
|= (int)str
[i
] & 0x3f;
91 /* expected size used to encode one unicode char */
92 static int utf8_unichar_to_encoded_len(int unichar
)
98 if (unichar
< 0x10000)
100 if (unichar
< 0x200000)
102 if (unichar
< 0x4000000)
107 /* check if unicode char has a valid numeric range */
108 static int utf8_unichar_valid_range(int unichar
)
110 if (unichar
> 0x10ffff)
112 if ((unichar
& 0xfffff800) == 0xd800)
114 if ((unichar
> 0xfdcf) && (unichar
< 0xfdf0))
116 if ((unichar
& 0xffff) == 0xffff)
121 /* validate one encoded unicode char and return its length */
122 static int utf8_encoded_valid_unichar(const char *str
)
128 len
= utf8_encoded_expected_len(str
);
136 /* check if expected encoded chars are available */
137 for (i
= 0; i
< len
; i
++)
138 if ((str
[i
] & 0x80) != 0x80)
141 unichar
= utf8_encoded_to_unichar(str
);
143 /* check if encoded length matches encoded value */
144 if (utf8_unichar_to_encoded_len(unichar
) != len
)
147 /* check if value has valid range */
148 if (!utf8_unichar_valid_range(unichar
))
154 static int replace_whitespace(const char *str
, char *to
, size_t len
)
158 /* strip trailing whitespace */
159 len
= strnlen(str
, len
);
160 while (len
&& isspace(str
[len
-1]))
163 /* strip leading whitespace */
165 while ((i
< len
) && isspace(str
[i
]))
170 /* substitute multiple whitespace with a single '_' */
171 if (isspace(str
[i
])) {
172 while (isspace(str
[i
]))
182 static int is_whitelisted(char c
, const char *white
)
184 if ((c
>= '0' && c
<= '9') ||
185 (c
>= 'A' && c
<= 'Z') ||
186 (c
>= 'a' && c
<= 'z') ||
187 strchr("#+-.:=@_", c
) != NULL
||
188 (white
!= NULL
&& strchr(white
, c
) != NULL
))
193 /* allow chars in whitelist, plain ascii, hex-escaping and valid utf8 */
194 static int replace_chars(char *str
, const char *white
)
199 while (str
[i
] != '\0') {
202 if (is_whitelisted(str
[i
], white
)) {
207 /* accept hex encoding */
208 if (str
[i
] == '\\' && str
[i
+1] == 'x') {
213 /* accept valid utf8 */
214 len
= utf8_encoded_valid_unichar(&str
[i
]);
220 /* if space is allowed, replace whitespace with ordinary space */
221 if (isspace(str
[i
]) && white
!= NULL
&& strchr(white
, ' ') != NULL
) {
228 /* everything else is replaced with '_' */
237 * blkid_encode_string:
238 * @str: input string to be encoded
239 * @str_enc: output string to store the encoded input string
240 * @len: maximum size of the output string, which may be
241 * four times as long as the input string
243 * Encode all potentially unsafe characters of a string to the
244 * corresponding hex value prefixed by '\x'.
246 * Returns: 0 if the entire string was copied, non-zero otherwise.
248 int blkid_encode_string(const char *str
, char *str_enc
, size_t len
)
252 if (!str
|| !str_enc
|| !len
)
255 for (i
= 0, j
= 0; str
[i
] != '\0'; i
++) {
258 seqlen
= utf8_encoded_valid_unichar(&str
[i
]);
260 if (len
-j
< (size_t)seqlen
)
262 memcpy(&str_enc
[j
], &str
[i
], seqlen
);
265 } else if (str
[i
] == '\\' || !is_whitelisted(str
[i
], NULL
)) {
268 sprintf(&str_enc
[j
], "\\x%02x", (unsigned char) str
[i
]);
290 * @str_safe: output string
291 * @len: size of output string
293 * Allows plain ascii, hex-escaping and valid utf8. Replaces all whitespaces
296 * Returns: 0 on success or -1 in case of error.
298 int blkid_safe_string(const char *str
, char *str_safe
, size_t len
)
300 if (!str
|| !str_safe
|| !len
)
302 replace_whitespace(str
, str_safe
, len
);
303 replace_chars(str_safe
, UDEV_ALLOWED_CHARS_INPUT
);