1 // SPDX-License-Identifier: GPL-2.0+
3 * charset conversion utils
5 * Copyright (c) 2017 Rob Clark
10 #include <capitalization.h>
13 static struct capitalization_table capitalization_table
[] =
14 #ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
15 UNICODE_CAPITALIZATION_TABLE
;
16 #elif CONFIG_FAT_DEFAULT_CODEPAGE == 1250
17 CP1250_CAPITALIZATION_TABLE
;
19 CP437_CAPITALIZATION_TABLE
;
23 * get_code() - read Unicode code point from UTF-8 stream
25 * @read_u8: - stream reader
26 * @src: - string buffer passed to stream reader, optional
27 * Return: - Unicode code point
29 static int get_code(u8 (*read_u8
)(void *data
), void *data
)
36 if (ch
>= 0xc2 && ch
<= 0xf4) {
45 if (ch
< 0x80 || ch
> 0xbf)
53 if ((code
>= 0xD800 && code
<= 0xDFFF) ||
57 if (ch
< 0x80 || ch
> 0xbf)
60 /* 0xc0 - 0xdf or continuation byte (0x80 - 0xbf) */
64 if (ch
< 0x80 || ch
> 0xbf)
68 } else if (ch
>= 0x80) {
77 * read_string() - read byte from character string
79 * @data: - pointer to string
82 * The string pointer is incremented if it does not point to '\0'.
84 static u8
read_string(void *data
)
87 const char **src
= (const char **)data
;
90 if (!src
|| !*src
|| !**src
)
98 * read_console() - read byte from console
100 * @data - not used, needed to match interface
101 * Return: - byte read or 0 on error
103 static u8
read_console(void *data
)
113 int console_read_unicode(s32
*code
)
116 /* No input available */
120 /* Read Unicode code */
121 *code
= get_code(read_console
, NULL
);
125 s32
utf8_get(const char **src
)
127 return get_code(read_string
, src
);
130 int utf8_put(s32 code
, char **dst
)
134 if ((code
>= 0xD800 && code
<= 0xDFFF) || code
>= 0x110000)
136 if (code
<= 0x007F) {
139 if (code
<= 0x07FF) {
140 **dst
= code
>> 6 | 0xC0;
142 if (code
< 0x10000) {
143 **dst
= code
>> 12 | 0xE0;
145 **dst
= code
>> 18 | 0xF0;
147 **dst
= (code
>> 12 & 0x3F) | 0x80;
150 **dst
= (code
>> 6 & 0x3F) | 0x80;
153 **dst
= (code
& 0x3F) | 0x80;
159 size_t utf8_utf16_strnlen(const char *src
, size_t count
)
163 for (; *src
&& count
; --count
) {
164 s32 code
= utf8_get(&src
);
169 /* Reserve space for a replacement character */
171 } else if (code
< 0x10000) {
180 int utf8_utf16_strncpy(u16
**dst
, const char *src
, size_t count
)
182 if (!src
|| !dst
|| !*dst
)
185 for (; count
&& *src
; --count
) {
186 s32 code
= utf8_get(&src
);
190 utf16_put(code
, dst
);
196 s32
utf16_get(const u16
**src
)
206 if (code
>= 0xDC00 && code
<= 0xDFFF)
208 if (code
>= 0xD800 && code
<= 0xDBFF) {
216 if (code2
<= 0xDC00 || code2
>= 0xDFFF)
224 int utf16_put(s32 code
, u16
**dst
)
228 if ((code
>= 0xD800 && code
<= 0xDFFF) || code
>= 0x110000)
230 if (code
< 0x10000) {
234 **dst
= code
>> 10 | 0xD800;
236 **dst
= (code
& 0x3ff) | 0xDC00;
242 size_t utf16_strnlen(const u16
*src
, size_t count
)
246 for (; *src
&& count
; --count
) {
247 s32 code
= utf16_get(&src
);
252 * In case of an illegal sequence still reserve space for a
253 * replacement character.
260 size_t utf16_utf8_strnlen(const u16
*src
, size_t count
)
264 for (; *src
&& count
; --count
) {
265 s32 code
= utf16_get(&src
);
270 /* Reserve space for a replacement character */
272 else if (code
< 0x80)
274 else if (code
< 0x800)
276 else if (code
< 0x10000)
284 int utf16_utf8_strncpy(char **dst
, const u16
*src
, size_t count
)
286 if (!src
|| !dst
|| !*dst
)
289 for (; count
&& *src
; --count
) {
290 s32 code
= utf16_get(&src
);
300 s32
utf_to_lower(const s32 code
)
302 struct capitalization_table
*pos
= capitalization_table
;
306 if (code
>= 'A' && code
<= 'Z')
310 for (; pos
->upper
; ++pos
) {
311 if (pos
->upper
== code
) {
319 s32
utf_to_upper(const s32 code
)
321 struct capitalization_table
*pos
= capitalization_table
;
325 if (code
>= 'a' && code
<= 'z')
329 for (; pos
->lower
; ++pos
) {
330 if (pos
->lower
== code
) {
339 * u16_strncmp() - compare two u16 string
341 * @s1: first string to compare
342 * @s2: second string to compare
343 * @n: maximum number of u16 to compare
344 * Return: 0 if the first n u16 are the same in s1 and s2
345 * < 0 if the first different u16 in s1 is less than the
346 * corresponding u16 in s2
347 * > 0 if the first different u16 in s1 is greater than the
348 * corresponding u16 in s2
350 int u16_strncmp(const u16
*s1
, const u16
*s2
, size_t n
)
354 for (; n
; --n
, ++s1
, ++s2
) {
363 size_t u16_strlen(const void *in
)
365 const char *pos
= in
;
368 for (; pos
[0] || pos
[1]; pos
+= 2)
370 ret
= pos
- (char *)in
;
375 size_t u16_strnlen(const u16
*in
, size_t count
)
378 for (i
= 0; count
-- && in
[i
]; i
++);
382 u16
*u16_strcpy(u16
*dest
, const u16
*src
)
386 for (;; dest
++, src
++) {
395 u16
*u16_strdup(const void *src
)
402 len
= (u16_strlen(src
) + 1) * sizeof(u16
);
406 memcpy(new, src
, len
);
411 /* Convert UTF-16 to UTF-8. */
412 uint8_t *utf16_to_utf8(uint8_t *dest
, const uint16_t *src
, size_t size
)
414 uint32_t code_high
= 0;
417 uint32_t code
= *src
++;
420 if (code
>= 0xDC00 && code
<= 0xDFFF) {
421 /* Surrogate pair. */
422 code
= ((code_high
- 0xD800) << 10) + (code
- 0xDC00) + 0x10000;
424 *dest
++ = (code
>> 18) | 0xF0;
425 *dest
++ = ((code
>> 12) & 0x3F) | 0x80;
426 *dest
++ = ((code
>> 6) & 0x3F) | 0x80;
427 *dest
++ = (code
& 0x3F) | 0x80;
431 /* *src may be valid. Don't eat it. */
437 if (code
<= 0x007F) {
439 } else if (code
<= 0x07FF) {
440 *dest
++ = (code
>> 6) | 0xC0;
441 *dest
++ = (code
& 0x3F) | 0x80;
442 } else if (code
>= 0xD800 && code
<= 0xDBFF) {
445 } else if (code
>= 0xDC00 && code
<= 0xDFFF) {
448 } else if (code
< 0x10000) {
449 *dest
++ = (code
>> 12) | 0xE0;
450 *dest
++ = ((code
>> 6) & 0x3F) | 0x80;
451 *dest
++ = (code
& 0x3F) | 0x80;
453 *dest
++ = (code
>> 18) | 0xF0;
454 *dest
++ = ((code
>> 12) & 0x3F) | 0x80;
455 *dest
++ = ((code
>> 6) & 0x3F) | 0x80;
456 *dest
++ = (code
& 0x3F) | 0x80;