1 // SPDX-License-Identifier: GPL-2.0+
3 * Unit tests for Unicode functions
5 * Copyright (c) 2018 Heinrich Schuchardt <xypron.glpk@gmx.de>
14 #include <test/test.h>
15 #include <test/suites.h>
18 /* Linker list entry for a Unicode test */
19 #define UNICODE_TEST(_name) UNIT_TEST(_name, 0, unicode_test)
21 /* Constants c1-c4 and d1-d4 encode the same letters */
23 /* Six characters translating to one utf-8 byte each. */
24 static const u16 c1
[] = {0x55, 0x2d, 0x42, 0x6f, 0x6f, 0x74, 0x00};
25 /* One character translating to two utf-8 bytes */
26 static const u16 c2
[] = {0x6b, 0x61, 0x66, 0x62, 0xe1, 0x74, 0x75, 0x72, 0x00};
27 /* Three characters translating to three utf-8 bytes each */
28 static const u16 c3
[] = {0x6f5c, 0x6c34, 0x8266, 0x00};
29 /* Three letters translating to four utf-8 bytes each */
30 static const u16 c4
[] = {0xd801, 0xdc8d, 0xd801, 0xdc96, 0xd801, 0xdc87,
33 /* Illegal utf-16 strings */
34 static const u16 i1
[] = {0x69, 0x31, 0xdc87, 0x6c, 0x00};
35 static const u16 i2
[] = {0x69, 0x32, 0xd801, 0xd801, 0x6c, 0x00};
36 static const u16 i3
[] = {0x69, 0x33, 0xd801, 0x00};
38 /* Six characters translating to one utf-16 word each. */
39 static const char d1
[] = {0x55, 0x2d, 0x42, 0x6f, 0x6f, 0x74, 0x00};
40 /* Eight characters translating to one utf-16 word each */
41 static const char d2
[] = {0x6b, 0x61, 0x66, 0x62, 0xc3, 0xa1, 0x74, 0x75,
43 /* Three characters translating to one utf-16 word each */
44 static const char d3
[] = {0xe6, 0xbd, 0x9c, 0xe6, 0xb0, 0xb4, 0xe8, 0x89,
46 /* Three letters translating to two utf-16 word each */
47 static const char d4
[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96,
48 0xf0, 0x90, 0x92, 0x87, 0x00};
50 /* Illegal utf-8 strings */
51 static const char j1
[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00};
52 static const char j2
[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00};
53 static const char j3
[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00};
55 static int unicode_test_u16_strlen(struct unit_test_state
*uts
)
57 ut_asserteq(6, u16_strlen(c1
));
58 ut_asserteq(8, u16_strlen(c2
));
59 ut_asserteq(3, u16_strlen(c3
));
60 ut_asserteq(6, u16_strlen(c4
));
63 UNICODE_TEST(unicode_test_u16_strlen
);
65 static int unicode_test_u16_strdup(struct unit_test_state
*uts
)
67 u16
*copy
= u16_strdup(c4
);
69 ut_assert(copy
!= c4
);
70 ut_assert(!memcmp(copy
, c4
, sizeof(c4
)));
74 UNICODE_TEST(unicode_test_u16_strdup
);
76 static int unicode_test_u16_strcpy(struct unit_test_state
*uts
)
81 r
= u16_strcpy(copy
, c1
);
83 ut_assert(!memcmp(copy
, c1
, sizeof(c1
)));
86 UNICODE_TEST(unicode_test_u16_strcpy
);
88 /* U-Boot uses UTF-16 strings in the EFI context only. */
89 #if CONFIG_IS_ENABLED(EFI_LOADER) && !defined(API_BUILD)
90 static int unicode_test_string16(struct unit_test_state
*uts
)
94 /* Test length and precision */
95 memset(buf
, 0xff, sizeof(buf
));
96 sprintf(buf
, "%8.6ls", c2
);
97 ut_asserteq(' ', buf
[1]);
98 ut_assert(!strncmp(&buf
[2], d2
, 7));
101 memset(buf
, 0xff, sizeof(buf
));
102 sprintf(buf
, "%8.6ls", c4
);
103 ut_asserteq(' ', buf
[4]);
104 ut_assert(!strncmp(&buf
[5], d4
, 12));
107 memset(buf
, 0xff, sizeof(buf
));
108 sprintf(buf
, "%-8.2ls", c4
);
109 ut_asserteq(' ', buf
[8]);
110 ut_assert(!strncmp(buf
, d4
, 8));
113 /* Test handling of illegal utf-16 sequences */
114 memset(buf
, 0xff, sizeof(buf
));
115 sprintf(buf
, "%ls", i1
);
116 ut_asserteq_str("i1?l", buf
);
118 memset(buf
, 0xff, sizeof(buf
));
119 sprintf(buf
, "%ls", i2
);
120 ut_asserteq_str("i2?l", buf
);
122 memset(buf
, 0xff, sizeof(buf
));
123 sprintf(buf
, "%ls", i3
);
124 ut_asserteq_str("i3?", buf
);
128 UNICODE_TEST(unicode_test_string16
);
131 static int unicode_test_utf8_get(struct unit_test_state
*uts
)
137 /* Check characters less than 0x800 */
139 for (i
= 0; i
< 8; ++i
) {
140 code
= utf8_get((const char **)&s
);
141 /* c2 is the utf-8 encoding of d2 */
142 ut_asserteq(c2
[i
], code
);
146 ut_asserteq_ptr(s
, d2
+ 9)
148 /* Check characters less than 0x10000 */
150 for (i
= 0; i
< 4; ++i
) {
151 code
= utf8_get((const char **)&s
);
152 /* c3 is the utf-8 encoding of d3 */
153 ut_asserteq(c3
[i
], code
);
157 ut_asserteq_ptr(s
, d3
+ 9)
159 /* Check character greater 0xffff */
161 code
= utf8_get((const char **)&s
);
162 ut_asserteq(0x0001048d, code
);
163 ut_asserteq_ptr(s
, d4
+ 4);
167 UNICODE_TEST(unicode_test_utf8_get
);
169 static int unicode_test_utf8_put(struct unit_test_state
*uts
)
171 char buffer
[8] = { 0, };
174 /* Commercial at, translates to one character */
176 ut_assert(!utf8_put('@', &pos
))
177 ut_asserteq(1, pos
- buffer
);
178 ut_asserteq('@', buffer
[0]);
179 ut_assert(!buffer
[1]);
181 /* Latin letter G with acute, translates to two charactes */
183 ut_assert(!utf8_put(0x1f4, &pos
));
184 ut_asserteq(2, pos
- buffer
);
185 ut_asserteq_str("\xc7\xb4", buffer
);
187 /* Tagalog letter i, translates to three characters */
189 ut_assert(!utf8_put(0x1701, &pos
));
190 ut_asserteq(3, pos
- buffer
);
191 ut_asserteq_str("\xe1\x9c\x81", buffer
);
193 /* Hamster face, translates to four characters */
195 ut_assert(!utf8_put(0x1f439, &pos
));
196 ut_asserteq(4, pos
- buffer
);
197 ut_asserteq_str("\xf0\x9f\x90\xb9", buffer
);
201 ut_asserteq(-1, utf8_put(0xd888, &pos
));
205 UNICODE_TEST(unicode_test_utf8_put
);
207 static int unicode_test_utf8_utf16_strlen(struct unit_test_state
*uts
)
209 ut_asserteq(6, utf8_utf16_strlen(d1
));
210 ut_asserteq(8, utf8_utf16_strlen(d2
));
211 ut_asserteq(3, utf8_utf16_strlen(d3
));
212 ut_asserteq(6, utf8_utf16_strlen(d4
));
214 /* illegal utf-8 sequences */
215 ut_asserteq(4, utf8_utf16_strlen(j1
));
216 ut_asserteq(4, utf8_utf16_strlen(j2
));
217 ut_asserteq(3, utf8_utf16_strlen(j3
));
221 UNICODE_TEST(unicode_test_utf8_utf16_strlen
);
223 static int unicode_test_utf8_utf16_strnlen(struct unit_test_state
*uts
)
225 ut_asserteq(3, utf8_utf16_strnlen(d1
, 3));
226 ut_asserteq(6, utf8_utf16_strnlen(d1
, 13));
227 ut_asserteq(6, utf8_utf16_strnlen(d2
, 6));
228 ut_asserteq(2, utf8_utf16_strnlen(d3
, 2));
229 ut_asserteq(4, utf8_utf16_strnlen(d4
, 2));
230 ut_asserteq(6, utf8_utf16_strnlen(d4
, 3));
232 /* illegal utf-8 sequences */
233 ut_asserteq(4, utf8_utf16_strnlen(j1
, 16));
234 ut_asserteq(4, utf8_utf16_strnlen(j2
, 16));
235 ut_asserteq(3, utf8_utf16_strnlen(j3
, 16));
239 UNICODE_TEST(unicode_test_utf8_utf16_strnlen
);
242 * ut_u16_strcmp() - Compare to u16 strings.
246 * @count: number of u16 to compare
247 * Return: -1 if a1 < a2, 0 if a1 == a2, 1 if a1 > a2
249 static int unicode_test_u16_strcmp(const u16
*a1
, const u16
*a2
, size_t count
)
251 for (; (*a1
|| *a2
) && count
; ++a1
, ++a2
, --count
) {
260 static int unicode_test_utf8_utf16_strcpy(struct unit_test_state
*uts
)
266 utf8_utf16_strcpy(&pos
, d1
);
267 ut_asserteq(6, pos
- buf
);
268 ut_assert(!unicode_test_u16_strcmp(buf
, c1
, SIZE_MAX
));
271 utf8_utf16_strcpy(&pos
, d2
);
272 ut_asserteq(8, pos
- buf
);
273 ut_assert(!unicode_test_u16_strcmp(buf
, c2
, SIZE_MAX
));
276 utf8_utf16_strcpy(&pos
, d3
);
277 ut_asserteq(3, pos
- buf
);
278 ut_assert(!unicode_test_u16_strcmp(buf
, c3
, SIZE_MAX
));
281 utf8_utf16_strcpy(&pos
, d4
);
282 ut_asserteq(6, pos
- buf
);
283 ut_assert(!unicode_test_u16_strcmp(buf
, c4
, SIZE_MAX
));
285 /* Illegal utf-8 strings */
287 utf8_utf16_strcpy(&pos
, j1
);
288 ut_asserteq(4, pos
- buf
);
289 ut_assert(!unicode_test_u16_strcmp(buf
, L
"j1?l", SIZE_MAX
));
292 utf8_utf16_strcpy(&pos
, j2
);
293 ut_asserteq(4, pos
- buf
);
294 ut_assert(!unicode_test_u16_strcmp(buf
, L
"j2?l", SIZE_MAX
));
297 utf8_utf16_strcpy(&pos
, j3
);
298 ut_asserteq(3, pos
- buf
);
299 ut_assert(!unicode_test_u16_strcmp(buf
, L
"j3?", SIZE_MAX
));
303 UNICODE_TEST(unicode_test_utf8_utf16_strcpy
);
305 static int unicode_test_utf8_utf16_strncpy(struct unit_test_state
*uts
)
311 memset(buf
, 0, sizeof(buf
));
312 utf8_utf16_strncpy(&pos
, d1
, 4);
313 ut_asserteq(4, pos
- buf
);
315 ut_assert(!unicode_test_u16_strcmp(buf
, c1
, 4));
318 memset(buf
, 0, sizeof(buf
));
319 utf8_utf16_strncpy(&pos
, d2
, 10);
320 ut_asserteq(8, pos
- buf
);
322 ut_assert(!unicode_test_u16_strcmp(buf
, c2
, SIZE_MAX
));
325 memset(buf
, 0, sizeof(buf
));
326 utf8_utf16_strncpy(&pos
, d3
, 2);
327 ut_asserteq(2, pos
- buf
);
329 ut_assert(!unicode_test_u16_strcmp(buf
, c3
, 2));
332 memset(buf
, 0, sizeof(buf
));
333 utf8_utf16_strncpy(&pos
, d4
, 2);
334 ut_asserteq(4, pos
- buf
);
336 ut_assert(!unicode_test_u16_strcmp(buf
, c4
, 4));
339 memset(buf
, 0, sizeof(buf
));
340 utf8_utf16_strncpy(&pos
, d4
, 10);
341 ut_asserteq(6, pos
- buf
);
343 ut_assert(!unicode_test_u16_strcmp(buf
, c4
, SIZE_MAX
));
347 UNICODE_TEST(unicode_test_utf8_utf16_strncpy
);
349 static int unicode_test_utf16_get(struct unit_test_state
*uts
)
355 /* Check characters less than 0x10000 */
357 for (i
= 0; i
< 9; ++i
) {
358 code
= utf16_get((const u16
**)&s
);
359 ut_asserteq(c2
[i
], code
);
363 ut_asserteq_ptr(c2
+ 8, s
);
365 /* Check character greater 0xffff */
367 code
= utf16_get((const u16
**)&s
);
368 ut_asserteq(0x0001048d, code
);
369 ut_asserteq_ptr(c4
+ 2, s
);
373 UNICODE_TEST(unicode_test_utf16_get
);
375 static int unicode_test_utf16_put(struct unit_test_state
*uts
)
377 u16 buffer
[4] = { 0, };
380 /* Commercial at, translates to one word */
382 ut_assert(!utf16_put('@', &pos
));
383 ut_asserteq(1, pos
- buffer
);
384 ut_asserteq((u16
)'@', buffer
[0]);
385 ut_assert(!buffer
[1]);
387 /* Hamster face, translates to two words */
389 ut_assert(!utf16_put(0x1f439, &pos
));
390 ut_asserteq(2, pos
- buffer
);
391 ut_asserteq((u16
)0xd83d, buffer
[0]);
392 ut_asserteq((u16
)0xdc39, buffer
[1]);
393 ut_assert(!buffer
[2]);
397 ut_asserteq(-1, utf16_put(0xd888, &pos
));
401 UNICODE_TEST(unicode_test_utf16_put
);
403 static int unicode_test_utf16_strnlen(struct unit_test_state
*uts
)
405 ut_asserteq(3, utf16_strnlen(c1
, 3));
406 ut_asserteq(6, utf16_strnlen(c1
, 13));
407 ut_asserteq(6, utf16_strnlen(c2
, 6));
408 ut_asserteq(2, utf16_strnlen(c3
, 2));
409 ut_asserteq(2, utf16_strnlen(c4
, 2));
410 ut_asserteq(3, utf16_strnlen(c4
, 3));
412 /* illegal utf-16 word sequences */
413 ut_asserteq(4, utf16_strnlen(i1
, 16));
414 ut_asserteq(4, utf16_strnlen(i2
, 16));
415 ut_asserteq(3, utf16_strnlen(i3
, 16));
419 UNICODE_TEST(unicode_test_utf16_strnlen
);
421 static int unicode_test_utf16_utf8_strlen(struct unit_test_state
*uts
)
423 ut_asserteq(6, utf16_utf8_strlen(c1
));
424 ut_asserteq(9, utf16_utf8_strlen(c2
));
425 ut_asserteq(9, utf16_utf8_strlen(c3
));
426 ut_asserteq(12, utf16_utf8_strlen(c4
));
428 /* illegal utf-16 word sequences */
429 ut_asserteq(4, utf16_utf8_strlen(i1
));
430 ut_asserteq(4, utf16_utf8_strlen(i2
));
431 ut_asserteq(3, utf16_utf8_strlen(i3
));
435 UNICODE_TEST(unicode_test_utf16_utf8_strlen
);
437 static int unicode_test_utf16_utf8_strnlen(struct unit_test_state
*uts
)
439 ut_asserteq(3, utf16_utf8_strnlen(c1
, 3));
440 ut_asserteq(6, utf16_utf8_strnlen(c1
, 13));
441 ut_asserteq(7, utf16_utf8_strnlen(c2
, 6));
442 ut_asserteq(6, utf16_utf8_strnlen(c3
, 2));
443 ut_asserteq(8, utf16_utf8_strnlen(c4
, 2));
444 ut_asserteq(12, utf16_utf8_strnlen(c4
, 3));
447 UNICODE_TEST(unicode_test_utf16_utf8_strnlen
);
449 static int unicode_test_utf16_utf8_strcpy(struct unit_test_state
*uts
)
455 utf16_utf8_strcpy(&pos
, c1
);
456 ut_asserteq(6, pos
- buf
);
457 ut_asserteq_str(d1
, buf
);
460 utf16_utf8_strcpy(&pos
, c2
);
461 ut_asserteq(9, pos
- buf
);
462 ut_asserteq_str(d2
, buf
);
465 utf16_utf8_strcpy(&pos
, c3
);
466 ut_asserteq(9, pos
- buf
);
467 ut_asserteq_str(d3
, buf
);
470 utf16_utf8_strcpy(&pos
, c4
);
471 ut_asserteq(12, pos
- buf
);
472 ut_asserteq_str(d4
, buf
);
474 /* Illegal utf-16 strings */
476 utf16_utf8_strcpy(&pos
, i1
);
477 ut_asserteq(4, pos
- buf
);
478 ut_asserteq_str("i1?l", buf
);
481 utf16_utf8_strcpy(&pos
, i2
);
482 ut_asserteq(4, pos
- buf
);
483 ut_asserteq_str("i2?l", buf
);
486 utf16_utf8_strcpy(&pos
, i3
);
487 ut_asserteq(3, pos
- buf
);
488 ut_asserteq_str("i3?", buf
);
492 UNICODE_TEST(unicode_test_utf16_utf8_strcpy
);
494 static int unicode_test_utf16_utf8_strncpy(struct unit_test_state
*uts
)
500 memset(buf
, 0, sizeof(buf
));
501 utf16_utf8_strncpy(&pos
, c1
, 4);
502 ut_asserteq(4, pos
- buf
);
504 ut_assert(!strncmp(buf
, d1
, 4));
507 memset(buf
, 0, sizeof(buf
));
508 utf16_utf8_strncpy(&pos
, c2
, 10);
509 ut_asserteq(9, pos
- buf
);
511 ut_assert(!strncmp(buf
, d2
, SIZE_MAX
));
514 memset(buf
, 0, sizeof(buf
));
515 utf16_utf8_strncpy(&pos
, c3
, 2);
516 ut_asserteq(6, pos
- buf
);
518 ut_assert(!strncmp(buf
, d3
, 6));
521 memset(buf
, 0, sizeof(buf
));
522 utf16_utf8_strncpy(&pos
, c4
, 2);
523 ut_asserteq(8, pos
- buf
);
525 ut_assert(!strncmp(buf
, d4
, 8));
528 memset(buf
, 0, sizeof(buf
));
529 utf16_utf8_strncpy(&pos
, c4
, 10);
530 ut_asserteq(12, pos
- buf
);
532 ut_assert(!strncmp(buf
, d4
, SIZE_MAX
));
536 UNICODE_TEST(unicode_test_utf16_utf8_strncpy
);
538 static int unicode_test_utf_to_lower(struct unit_test_state
*uts
)
540 ut_asserteq('@', utf_to_lower('@'));
541 ut_asserteq('a', utf_to_lower('A'));
542 ut_asserteq('z', utf_to_lower('Z'));
543 ut_asserteq('[', utf_to_lower('['));
544 ut_asserteq('m', utf_to_lower('m'));
545 /* Latin letter O with diaresis (umlaut) */
546 ut_asserteq(0x00f6, utf_to_lower(0x00d6));
547 #ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
548 /* Cyrillic letter I*/
549 ut_asserteq(0x0438, utf_to_lower(0x0418));
553 UNICODE_TEST(unicode_test_utf_to_lower
);
555 static int unicode_test_utf_to_upper(struct unit_test_state
*uts
)
557 ut_asserteq('`', utf_to_upper('`'));
558 ut_asserteq('A', utf_to_upper('a'));
559 ut_asserteq('Z', utf_to_upper('z'));
560 ut_asserteq('{', utf_to_upper('{'));
561 ut_asserteq('M', utf_to_upper('M'));
562 /* Latin letter O with diaresis (umlaut) */
563 ut_asserteq(0x00d6, utf_to_upper(0x00f6));
564 #ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
565 /* Cyrillic letter I */
566 ut_asserteq(0x0418, utf_to_upper(0x0438));
570 UNICODE_TEST(unicode_test_utf_to_upper
);
572 static int unicode_test_u16_strncmp(struct unit_test_state
*uts
)
574 ut_assert(u16_strncmp(L
"abc", L
"abc", 3) == 0);
575 ut_assert(u16_strncmp(L
"abcdef", L
"abcghi", 3) == 0);
576 ut_assert(u16_strncmp(L
"abcdef", L
"abcghi", 6) < 0);
577 ut_assert(u16_strncmp(L
"abcghi", L
"abcdef", 6) > 0);
578 ut_assert(u16_strcmp(L
"abc", L
"abc") == 0);
579 ut_assert(u16_strcmp(L
"abcdef", L
"deghi") < 0);
580 ut_assert(u16_strcmp(L
"deghi", L
"abcdef") > 0);
583 UNICODE_TEST(unicode_test_u16_strncmp
);
585 static int unicode_test_u16_strsize(struct unit_test_state
*uts
)
587 ut_asserteq_64(u16_strsize(c1
), 14);
588 ut_asserteq_64(u16_strsize(c2
), 18);
589 ut_asserteq_64(u16_strsize(c3
), 8);
590 ut_asserteq_64(u16_strsize(c4
), 14);
593 UNICODE_TEST(unicode_test_u16_strsize
);
595 int do_ut_unicode(struct cmd_tbl
*cmdtp
, int flag
, int argc
, char *const argv
[])
597 struct unit_test
*tests
= ll_entry_start(struct unit_test
, unicode_test
);
598 const int n_ents
= ll_entry_count(struct unit_test
, unicode_test
);
600 return cmd_ut_category("Unicode", "unicode_test_",
601 tests
, n_ents
, argc
, argv
);