]>
Commit | Line | Data |
---|---|---|
f739fcd8 | 1 | /* SPDX-License-Identifier: GPL-2.0+ */ |
78178bb0 RC |
2 | /* |
3 | * charset conversion utils | |
4 | * | |
5 | * Copyright (c) 2017 Rob Clark | |
78178bb0 RC |
6 | */ |
7 | ||
8 | #ifndef __CHARSET_H_ | |
9 | #define __CHARSET_H_ | |
10 | ||
d8c28232 | 11 | #include <linux/kernel.h> |
f58c5ecb HS |
12 | #include <linux/types.h> |
13 | ||
984f251f | 14 | #define MAX_UTF8_PER_UTF16 3 |
78178bb0 | 15 | |
35cbb796 HS |
16 | /** |
17 | * console_read_unicode() - read Unicode code point from console | |
18 | * | |
19 | * @code: pointer to store Unicode code point | |
20 | * Return: 0 = success | |
21 | */ | |
22 | int console_read_unicode(s32 *code); | |
23 | ||
d8c28232 HS |
24 | /** |
25 | * utf8_get() - get next UTF-8 code point from buffer | |
26 | * | |
27 | * @src: pointer to current byte, updated to point to next byte | |
28 | * Return: code point, or 0 for end of string, or -1 if no legal | |
29 | * code point is found. In case of an error src points to | |
30 | * the incorrect byte. | |
31 | */ | |
32 | s32 utf8_get(const char **src); | |
33 | ||
34 | /** | |
35 | * utf8_put() - write UTF-8 code point to buffer | |
36 | * | |
37 | * @code: code point | |
38 | * @dst: pointer to destination buffer, updated to next position | |
39 | * Return: -1 if the input parameters are invalid | |
40 | */ | |
41 | int utf8_put(s32 code, char **dst); | |
42 | ||
43 | /** | |
44 | * utf8_utf16_strnlen() - length of a truncated utf-8 string after conversion | |
45 | * to utf-16 | |
46 | * | |
47 | * @src: utf-8 string | |
48 | * @count: maximum number of code points to convert | |
49 | * Return: length in bytes after conversion to utf-16 without the | |
50 | * trailing \0. If an invalid UTF-8 sequence is hit one | |
51 | * word will be reserved for a replacement character. | |
52 | */ | |
53 | size_t utf8_utf16_strnlen(const char *src, size_t count); | |
54 | ||
55 | /** | |
56 | * utf8_utf16_strlen() - length of a utf-8 string after conversion to utf-16 | |
57 | * | |
58 | * @src: utf-8 string | |
59 | * Return: length in bytes after conversion to utf-16 without the | |
60 | * trailing \0. -1 if the utf-8 string is not valid. | |
61 | */ | |
62 | #define utf8_utf16_strlen(a) utf8_utf16_strnlen((a), SIZE_MAX) | |
63 | ||
64 | /** | |
65 | * utf8_utf16_strncpy() - copy utf-8 string to utf-16 string | |
66 | * | |
67 | * @dst: destination buffer | |
68 | * @src: source buffer | |
69 | * @count: maximum number of code points to copy | |
70 | * Return: -1 if the input parameters are invalid | |
71 | */ | |
72 | int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count); | |
73 | ||
74 | /** | |
75 | * utf8_utf16_strcpy() - copy utf-8 string to utf-16 string | |
76 | * | |
77 | * @dst: destination buffer | |
78 | * @src: source buffer | |
79 | * Return: -1 if the input parameters are invalid | |
80 | */ | |
81 | #define utf8_utf16_strcpy(d, s) utf8_utf16_strncpy((d), (s), SIZE_MAX) | |
82 | ||
83 | /** | |
84 | * utf16_get() - get next UTF-16 code point from buffer | |
85 | * | |
86 | * @src: pointer to current word, updated to point to next word | |
87 | * Return: code point, or 0 for end of string, or -1 if no legal | |
88 | * code point is found. In case of an error src points to | |
89 | * the incorrect word. | |
90 | */ | |
91 | s32 utf16_get(const u16 **src); | |
92 | ||
93 | /** | |
94 | * utf16_put() - write UTF-16 code point to buffer | |
95 | * | |
96 | * @code: code point | |
97 | * @dst: pointer to destination buffer, updated to next position | |
98 | * Return: -1 if the input parameters are invalid | |
99 | */ | |
100 | int utf16_put(s32 code, u16 **dst); | |
101 | ||
102 | /** | |
103 | * utf16_strnlen() - length of a truncated utf-16 string | |
104 | * | |
105 | * @src: utf-16 string | |
106 | * @count: maximum number of code points to convert | |
107 | * Return: length in code points. If an invalid UTF-16 sequence is | |
108 | * hit one position will be reserved for a replacement | |
109 | * character. | |
110 | */ | |
111 | size_t utf16_strnlen(const u16 *src, size_t count); | |
112 | ||
113 | /** | |
114 | * utf16_utf8_strnlen() - length of a truncated utf-16 string after conversion | |
115 | * to utf-8 | |
116 | * | |
117 | * @src: utf-16 string | |
118 | * @count: maximum number of code points to convert | |
119 | * Return: length in bytes after conversion to utf-8 without the | |
120 | * trailing \0. If an invalid UTF-16 sequence is hit one | |
121 | * byte will be reserved for a replacement character. | |
122 | */ | |
123 | size_t utf16_utf8_strnlen(const u16 *src, size_t count); | |
124 | ||
125 | /** | |
126 | * utf16_utf8_strlen() - length of a utf-16 string after conversion to utf-8 | |
127 | * | |
128 | * @src: utf-16 string | |
129 | * Return: length in bytes after conversion to utf-8 without the | |
130 | * trailing \0. -1 if the utf-16 string is not valid. | |
131 | */ | |
132 | #define utf16_utf8_strlen(a) utf16_utf8_strnlen((a), SIZE_MAX) | |
133 | ||
134 | /** | |
135 | * utf16_utf8_strncpy() - copy utf-16 string to utf-8 string | |
136 | * | |
137 | * @dst: destination buffer | |
138 | * @src: source buffer | |
139 | * @count: maximum number of code points to copy | |
140 | * Return: -1 if the input parameters are invalid | |
141 | */ | |
142 | int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count); | |
143 | ||
144 | /** | |
145 | * utf16_utf8_strcpy() - copy utf-16 string to utf-8 string | |
146 | * | |
147 | * @dst: destination buffer | |
148 | * @src: source buffer | |
149 | * Return: -1 if the input parameters are invalid | |
150 | */ | |
151 | #define utf16_utf8_strcpy(d, s) utf16_utf8_strncpy((d), (s), SIZE_MAX) | |
152 | ||
b5130a81 HS |
153 | /** |
154 | * utf_to_lower() - convert a Unicode letter to lower case | |
155 | * | |
156 | * @code: letter to convert | |
157 | * Return: lower case letter or unchanged letter | |
158 | */ | |
159 | s32 utf_to_lower(const s32 code); | |
160 | ||
161 | /** | |
162 | * utf_to_upper() - convert a Unicode letter to upper case | |
163 | * | |
164 | * @code: letter to convert | |
165 | * Return: upper case letter or unchanged letter | |
166 | */ | |
167 | s32 utf_to_upper(const s32 code); | |
168 | ||
78178bb0 | 169 | /** |
1dde0d57 | 170 | * u16_strlen - count non-zero words |
78178bb0 | 171 | * |
1dde0d57 HS |
172 | * This function matches wsclen() if the -fshort-wchar compiler flag is set. |
173 | * In the EFI context we explicitly need a function handling u16 strings. | |
78178bb0 | 174 | * |
1dde0d57 HS |
175 | * @in: null terminated u16 string |
176 | * ReturnValue: number of non-zero words. | |
177 | * This is not the number of utf-16 letters! | |
78178bb0 | 178 | */ |
1dde0d57 | 179 | size_t u16_strlen(const u16 *in); |
78178bb0 RC |
180 | |
181 | /** | |
1dde0d57 | 182 | * u16_strlen - count non-zero words |
78178bb0 | 183 | * |
1dde0d57 HS |
184 | * This function matches wscnlen_s() if the -fshort-wchar compiler flag is set. |
185 | * In the EFI context we explicitly need a function handling u16 strings. | |
78178bb0 | 186 | * |
1dde0d57 HS |
187 | * @in: null terminated u16 string |
188 | * @count: maximum number of words to count | |
189 | * ReturnValue: number of non-zero words. | |
190 | * This is not the number of utf-16 letters! | |
78178bb0 | 191 | */ |
1dde0d57 | 192 | size_t u16_strnlen(const u16 *in, size_t count); |
78178bb0 | 193 | |
78178bb0 RC |
194 | /** |
195 | * utf16_to_utf8() - Convert an utf16 string to utf8 | |
196 | * | |
197 | * Converts 'size' characters of the utf16 string 'src' to utf8 | |
198 | * written to the 'dest' buffer. | |
199 | * | |
984f251f | 200 | * NOTE that a single utf16 character can generate up to 3 utf8 |
78178bb0 RC |
201 | * characters. See MAX_UTF8_PER_UTF16. |
202 | * | |
203 | * @dest the destination buffer to write the utf8 characters | |
204 | * @src the source utf16 string | |
205 | * @size the number of utf16 characters to convert | |
206 | * @return the pointer to the first unwritten byte in 'dest' | |
207 | */ | |
208 | uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size); | |
209 | ||
210 | #endif /* __CHARSET_H_ */ |