]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
02a36bc9 | 2 | |
b5efdb8a | 3 | #include "alloc-util.h" |
cf0fbc49 | 4 | #include "string-util.h" |
80b0a597 | 5 | #include "strv.h" |
02a36bc9 DR |
6 | #include "utf8.h" |
7 | #include "util.h" | |
8 | ||
7991ac34 DR |
9 | static void test_utf8_is_printable(void) { |
10 | assert_se(utf8_is_printable("ascii is valid\tunicode", 22)); | |
11 | assert_se(utf8_is_printable("\342\204\242", 3)); | |
12 | assert_se(!utf8_is_printable("\341\204", 2)); | |
a7176505 | 13 | assert_se(utf8_is_printable("ąę", 4)); |
7991ac34 DR |
14 | } |
15 | ||
02a36bc9 DR |
16 | static void test_utf8_is_valid(void) { |
17 | assert_se(utf8_is_valid("ascii is valid unicode")); | |
8f6ce71f | 18 | assert_se(utf8_is_valid("\342\204\242")); |
02a36bc9 DR |
19 | assert_se(!utf8_is_valid("\341\204")); |
20 | } | |
21 | ||
e7363c59 | 22 | static void test_ascii_is_valid(void) { |
294a3121 | 23 | assert_se( ascii_is_valid("alsdjf\t\vbarr\nba z")); |
e7363c59 DR |
24 | assert_se(!ascii_is_valid("\342\204\242")); |
25 | assert_se(!ascii_is_valid("\341\204")); | |
26 | } | |
27 | ||
294a3121 ZJS |
28 | static void test_ascii_is_valid_n(void) { |
29 | assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 17)); | |
30 | assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 16)); | |
31 | assert_se(!ascii_is_valid_n("alsdjf\t\vbarr\nba z", 18)); | |
32 | assert_se(!ascii_is_valid_n("\342\204\242", 3)); | |
33 | assert_se(!ascii_is_valid_n("\342\204\242", 2)); | |
34 | assert_se(!ascii_is_valid_n("\342\204\242", 1)); | |
35 | assert_se( ascii_is_valid_n("\342\204\242", 0)); | |
36 | } | |
37 | ||
e7363c59 DR |
38 | static void test_utf8_encoded_valid_unichar(void) { |
39 | assert_se(utf8_encoded_valid_unichar("\342\204\242") == 3); | |
40 | assert_se(utf8_encoded_valid_unichar("\302\256") == 2); | |
41 | assert_se(utf8_encoded_valid_unichar("a") == 1); | |
42 | assert_se(utf8_encoded_valid_unichar("\341\204") < 0); | |
43 | assert_se(utf8_encoded_valid_unichar("\341\204\341\204") < 0); | |
e7363c59 DR |
44 | } |
45 | ||
550a40ec ZJS |
46 | static void test_utf8_escaping(void) { |
47 | _cleanup_free_ char *p1, *p2, *p3; | |
48 | ||
49 | p1 = utf8_escape_invalid("goo goo goo"); | |
50 | puts(p1); | |
51 | assert_se(utf8_is_valid(p1)); | |
52 | ||
53 | p2 = utf8_escape_invalid("\341\204\341\204"); | |
54 | puts(p2); | |
55 | assert_se(utf8_is_valid(p2)); | |
56 | ||
57 | p3 = utf8_escape_invalid("\341\204"); | |
58 | puts(p3); | |
59 | assert_se(utf8_is_valid(p3)); | |
60 | } | |
61 | ||
fec84576 | 62 | static void test_utf8_escaping_printable(void) { |
3c6d3052 | 63 | _cleanup_free_ char *p1, *p2, *p3, *p4, *p5, *p6; |
fec84576 WC |
64 | |
65 | p1 = utf8_escape_non_printable("goo goo goo"); | |
66 | puts(p1); | |
67 | assert_se(utf8_is_valid(p1)); | |
68 | ||
69 | p2 = utf8_escape_non_printable("\341\204\341\204"); | |
70 | puts(p2); | |
71 | assert_se(utf8_is_valid(p2)); | |
72 | ||
73 | p3 = utf8_escape_non_printable("\341\204"); | |
74 | puts(p3); | |
75 | assert_se(utf8_is_valid(p3)); | |
76 | ||
77 | p4 = utf8_escape_non_printable("ąę\n가너도루\n1234\n\341\204\341\204\n\001 \019\20\a"); | |
78 | puts(p4); | |
79 | assert_se(utf8_is_valid(p4)); | |
80 | ||
81 | p5 = utf8_escape_non_printable("\001 \019\20\a"); | |
82 | puts(p5); | |
83 | assert_se(utf8_is_valid(p5)); | |
3c6d3052 LP |
84 | |
85 | p6 = utf8_escape_non_printable("\xef\xbf\x30\x13"); | |
86 | puts(p6); | |
87 | assert_se(utf8_is_valid(p6)); | |
fec84576 WC |
88 | } |
89 | ||
04166cb7 | 90 | static void test_utf16_to_utf8(void) { |
80b0a597 LP |
91 | const char16_t utf16[] = { htole16('a'), htole16(0xd800), htole16('b'), htole16(0xdc00), htole16('c'), htole16(0xd801), htole16(0xdc37) }; |
92 | static const char utf8[] = { 'a', 'b', 'c', 0xf0, 0x90, 0x90, 0xb7 }; | |
93 | _cleanup_free_ char16_t *b = NULL; | |
94 | _cleanup_free_ char *a = NULL; | |
04166cb7 | 95 | |
80b0a597 LP |
96 | /* Convert UTF-16 to UTF-8, filtering embedded bad chars */ |
97 | a = utf16_to_utf8(utf16, sizeof(utf16)); | |
04166cb7 | 98 | assert_se(a); |
80b0a597 LP |
99 | assert_se(memcmp(a, utf8, sizeof(utf8)) == 0); |
100 | ||
101 | /* Convert UTF-8 to UTF-16, and back */ | |
102 | b = utf8_to_utf16(utf8, sizeof(utf8)); | |
103 | assert_se(b); | |
04166cb7 TG |
104 | |
105 | free(a); | |
80b0a597 LP |
106 | a = utf16_to_utf8(b, char16_strlen(b) * 2); |
107 | assert_se(a); | |
108 | assert_se(strlen(a) == sizeof(utf8)); | |
109 | assert_se(memcmp(a, utf8, sizeof(utf8)) == 0); | |
04166cb7 TG |
110 | } |
111 | ||
e2cbc803 ZJS |
112 | static void test_utf8_n_codepoints(void) { |
113 | assert_se(utf8_n_codepoints("abc") == 3); | |
114 | assert_se(utf8_n_codepoints("zażółcić gęślą jaźń") == 19); | |
115 | assert_se(utf8_n_codepoints("串") == 1); | |
116 | assert_se(utf8_n_codepoints("") == 0); | |
117 | assert_se(utf8_n_codepoints("…👊🔪💐…") == 5); | |
118 | assert_se(utf8_n_codepoints("\xF1") == (size_t) -1); | |
119 | } | |
120 | ||
7c6c2e07 ZJS |
121 | static void test_utf8_console_width(void) { |
122 | assert_se(utf8_console_width("abc") == 3); | |
123 | assert_se(utf8_console_width("zażółcić gęślą jaźń") == 19); | |
124 | assert_se(utf8_console_width("串") == 2); | |
125 | assert_se(utf8_console_width("") == 0); | |
126 | assert_se(utf8_console_width("…👊🔪💐…") == 8); | |
127 | assert_se(utf8_console_width("\xF1") == (size_t) -1); | |
128 | } | |
129 | ||
80b0a597 LP |
130 | static void test_utf8_to_utf16(void) { |
131 | const char *p; | |
132 | ||
133 | FOREACH_STRING(p, | |
134 | "abc", | |
135 | "zażółcić gęślą jaźń", | |
136 | "串", | |
137 | "", | |
138 | "…👊🔪💐…") { | |
139 | ||
140 | _cleanup_free_ char16_t *a = NULL; | |
141 | _cleanup_free_ char *b = NULL; | |
142 | ||
143 | a = utf8_to_utf16(p, strlen(p)); | |
144 | assert_se(a); | |
145 | ||
146 | b = utf16_to_utf8(a, char16_strlen(a) * 2); | |
147 | assert_se(b); | |
148 | assert_se(streq(p, b)); | |
149 | } | |
150 | } | |
151 | ||
02a36bc9 DR |
152 | int main(int argc, char *argv[]) { |
153 | test_utf8_is_valid(); | |
7991ac34 | 154 | test_utf8_is_printable(); |
e7363c59 | 155 | test_ascii_is_valid(); |
294a3121 | 156 | test_ascii_is_valid_n(); |
e7363c59 | 157 | test_utf8_encoded_valid_unichar(); |
550a40ec | 158 | test_utf8_escaping(); |
fec84576 | 159 | test_utf8_escaping_printable(); |
04166cb7 | 160 | test_utf16_to_utf8(); |
e2cbc803 | 161 | test_utf8_n_codepoints(); |
7c6c2e07 | 162 | test_utf8_console_width(); |
80b0a597 | 163 | test_utf8_to_utf16(); |
7991ac34 DR |
164 | |
165 | return 0; | |
02a36bc9 | 166 | } |