1 // SPDX-License-Identifier: GPL-2.0+
3 * charset conversion utils
5 * Copyright (c) 2017 Rob Clark
10 #include <capitalization.h>
12 #include <efi_loader.h>
17 * codepage_437 - Unicode to codepage 437 translation table
19 const u16 codepage_437
[128] = CP437
;
21 static struct capitalization_table capitalization_table
[] =
22 #ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
23 UNICODE_CAPITALIZATION_TABLE
;
24 #elif CONFIG_FAT_DEFAULT_CODEPAGE == 1250
25 CP1250_CAPITALIZATION_TABLE
;
27 CP437_CAPITALIZATION_TABLE
;
31 * get_code() - read Unicode code point from UTF-8 stream
33 * @read_u8: - stream reader
34 * @src: - string buffer passed to stream reader, optional
35 * Return: - Unicode code point, or -1
37 static int get_code(u8 (*read_u8
)(void *data
), void *data
)
44 if (ch
>= 0xc2 && ch
<= 0xf4) {
53 if (ch
< 0x80 || ch
> 0xbf)
61 if ((code
>= 0xD800 && code
<= 0xDFFF) ||
65 if (ch
< 0x80 || ch
> 0xbf)
68 /* 0xc0 - 0xdf or continuation byte (0x80 - 0xbf) */
72 if (ch
< 0x80 || ch
> 0xbf)
76 } else if (ch
>= 0x80) {
85 * read_string() - read byte from character string
87 * @data: - pointer to string
90 * The string pointer is incremented if it does not point to '\0'.
92 static u8
read_string(void *data
)
95 const char **src
= (const char **)data
;
98 if (!src
|| !*src
|| !**src
)
106 * read_console() - read byte from console
108 * @data - not used, needed to match interface
109 * Return: - byte read or 0 on error
111 static u8
read_console(void *data
)
121 int console_read_unicode(s32
*code
)
127 /* No input available */
131 /* Read Unicode code */
132 c
= get_code(read_console
, NULL
);
140 s32
utf8_get(const char **src
)
142 return get_code(read_string
, src
);
145 int utf8_put(s32 code
, char **dst
)
149 if ((code
>= 0xD800 && code
<= 0xDFFF) || code
>= 0x110000)
151 if (code
<= 0x007F) {
154 if (code
<= 0x07FF) {
155 **dst
= code
>> 6 | 0xC0;
157 if (code
< 0x10000) {
158 **dst
= code
>> 12 | 0xE0;
160 **dst
= code
>> 18 | 0xF0;
162 **dst
= (code
>> 12 & 0x3F) | 0x80;
165 **dst
= (code
>> 6 & 0x3F) | 0x80;
168 **dst
= (code
& 0x3F) | 0x80;
174 size_t utf8_utf16_strnlen(const char *src
, size_t count
)
178 for (; *src
&& count
; --count
) {
179 s32 code
= utf8_get(&src
);
184 /* Reserve space for a replacement character */
186 } else if (code
< 0x10000) {
195 int utf8_utf16_strncpy(u16
**dst
, const char *src
, size_t count
)
197 if (!src
|| !dst
|| !*dst
)
200 for (; count
&& *src
; --count
) {
201 s32 code
= utf8_get(&src
);
205 utf16_put(code
, dst
);
211 s32
utf16_get(const u16
**src
)
221 if (code
>= 0xDC00 && code
<= 0xDFFF)
223 if (code
>= 0xD800 && code
<= 0xDBFF) {
231 if (code2
<= 0xDC00 || code2
>= 0xDFFF)
239 int utf16_put(s32 code
, u16
**dst
)
243 if ((code
>= 0xD800 && code
<= 0xDFFF) || code
>= 0x110000)
245 if (code
< 0x10000) {
249 **dst
= code
>> 10 | 0xD800;
251 **dst
= (code
& 0x3ff) | 0xDC00;
257 size_t utf16_strnlen(const u16
*src
, size_t count
)
261 for (; *src
&& count
; --count
) {
262 s32 code
= utf16_get(&src
);
267 * In case of an illegal sequence still reserve space for a
268 * replacement character.
275 size_t utf16_utf8_strnlen(const u16
*src
, size_t count
)
279 for (; *src
&& count
; --count
) {
280 s32 code
= utf16_get(&src
);
285 /* Reserve space for a replacement character */
287 else if (code
< 0x80)
289 else if (code
< 0x800)
291 else if (code
< 0x10000)
299 int utf16_utf8_strncpy(char **dst
, const u16
*src
, size_t count
)
301 if (!src
|| !dst
|| !*dst
)
304 for (; count
&& *src
; --count
) {
305 s32 code
= utf16_get(&src
);
315 s32
utf_to_lower(const s32 code
)
317 struct capitalization_table
*pos
= capitalization_table
;
321 if (code
>= 'A' && code
<= 'Z')
325 for (; pos
->upper
; ++pos
) {
326 if (pos
->upper
== code
) {
334 s32
utf_to_upper(const s32 code
)
336 struct capitalization_table
*pos
= capitalization_table
;
340 if (code
>= 'a' && code
<= 'z')
344 for (; pos
->lower
; ++pos
) {
345 if (pos
->lower
== code
) {
354 * u16_strcasecmp() - compare two u16 strings case insensitively
356 * @s1: first string to compare
357 * @s2: second string to compare
358 * @n: maximum number of u16 to compare
359 * Return: 0 if the first n u16 are the same in s1 and s2
360 * < 0 if the first different u16 in s1 is less than the
361 * corresponding u16 in s2
362 * > 0 if the first different u16 in s1 is greater than the
364 int u16_strcasecmp(const u16
*s1
, const u16
*s2
)
370 c1
= utf_to_upper(utf16_get(&s1
));
371 c2
= utf_to_upper(utf16_get(&s2
));
373 if (ret
|| !c1
|| c1
== -1 || c2
== -1)
380 * u16_strncmp() - compare two u16 string
382 * @s1: first string to compare
383 * @s2: second string to compare
384 * @n: maximum number of u16 to compare
385 * Return: 0 if the first n u16 are the same in s1 and s2
386 * < 0 if the first different u16 in s1 is less than the
387 * corresponding u16 in s2
388 * > 0 if the first different u16 in s1 is greater than the
389 * corresponding u16 in s2
391 int u16_strncmp(const u16
*s1
, const u16
*s2
, size_t n
)
395 for (; n
; --n
, ++s1
, ++s2
) {
404 size_t __efi_runtime
u16_strnlen(const u16
*in
, size_t count
)
407 for (i
= 0; count
-- && in
[i
]; i
++);
411 size_t u16_strsize(const void *in
)
413 return (u16_strlen(in
) + 1) * sizeof(u16
);
416 u16
*u16_strcpy(u16
*dest
, const u16
*src
)
420 for (;; dest
++, src
++) {
429 u16
*u16_strdup(const void *src
)
436 len
= u16_strsize(src
);
440 memcpy(new, src
, len
);
445 size_t u16_strlcat(u16
*dest
, const u16
*src
, size_t count
)
447 size_t destlen
= u16_strlen(dest
);
448 size_t srclen
= u16_strlen(src
);
449 size_t ret
= destlen
+ srclen
+ 1;
451 if (destlen
>= count
)
454 srclen
-= ret
- count
;
455 memcpy(&dest
[destlen
], src
, 2 * srclen
);
456 dest
[destlen
+ srclen
] = 0x0000;
461 /* Convert UTF-16 to UTF-8. */
462 uint8_t *utf16_to_utf8(uint8_t *dest
, const uint16_t *src
, size_t size
)
464 uint32_t code_high
= 0;
467 uint32_t code
= *src
++;
470 if (code
>= 0xDC00 && code
<= 0xDFFF) {
471 /* Surrogate pair. */
472 code
= ((code_high
- 0xD800) << 10) + (code
- 0xDC00) + 0x10000;
474 *dest
++ = (code
>> 18) | 0xF0;
475 *dest
++ = ((code
>> 12) & 0x3F) | 0x80;
476 *dest
++ = ((code
>> 6) & 0x3F) | 0x80;
477 *dest
++ = (code
& 0x3F) | 0x80;
481 /* *src may be valid. Don't eat it. */
487 if (code
<= 0x007F) {
489 } else if (code
<= 0x07FF) {
490 *dest
++ = (code
>> 6) | 0xC0;
491 *dest
++ = (code
& 0x3F) | 0x80;
492 } else if (code
>= 0xD800 && code
<= 0xDBFF) {
495 } else if (code
>= 0xDC00 && code
<= 0xDFFF) {
498 } else if (code
< 0x10000) {
499 *dest
++ = (code
>> 12) | 0xE0;
500 *dest
++ = ((code
>> 6) & 0x3F) | 0x80;
501 *dest
++ = (code
& 0x3F) | 0x80;
503 *dest
++ = (code
>> 18) | 0xF0;
504 *dest
++ = ((code
>> 12) & 0x3F) | 0x80;
505 *dest
++ = ((code
>> 6) & 0x3F) | 0x80;
506 *dest
++ = (code
& 0x3F) | 0x80;
514 int utf_to_cp(s32
*c
, const u16
*codepage
)
519 /* Look up codepage translation */
520 for (j
= 0; j
< 0x80; ++j
) {
521 if (*c
== codepage
[j
]) {
532 int utf8_to_cp437_stream(u8 c
, char *buffer
)
541 end
= buffer
+ strlen(buffer
);
547 ret
= utf_to_cp(&s
, codepage_437
);
556 int utf8_to_utf32_stream(u8 c
, char *buffer
)
564 end
= buffer
+ strlen(buffer
);