]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
02a36bc9 | 2 | |
b5efdb8a | 3 | #include "alloc-util.h" |
cf0fbc49 | 4 | #include "string-util.h" |
80b0a597 | 5 | #include "strv.h" |
02a36bc9 DR |
6 | #include "utf8.h" |
7 | #include "util.h" | |
8 | ||
7991ac34 | 9 | static void test_utf8_is_printable(void) { |
9743846e ZJS |
10 | log_info("/* %s */", __func__); |
11 | ||
7991ac34 DR |
12 | assert_se(utf8_is_printable("ascii is valid\tunicode", 22)); |
13 | assert_se(utf8_is_printable("\342\204\242", 3)); | |
14 | assert_se(!utf8_is_printable("\341\204", 2)); | |
a7176505 | 15 | assert_se(utf8_is_printable("ąę", 4)); |
62a3fc6d ZJS |
16 | assert_se(!utf8_is_printable("\r", 1)); |
17 | assert_se(utf8_is_printable("\n", 1)); | |
18 | assert_se(utf8_is_printable("\t", 1)); | |
7991ac34 DR |
19 | } |
20 | ||
80ab31a4 ZJS |
21 | static void test_utf8_n_is_valid(void) { |
22 | log_info("/* %s */", __func__); | |
23 | ||
24 | assert_se( utf8_is_valid_n("ascii is valid unicode", 21)); | |
25 | assert_se( utf8_is_valid_n("ascii is valid unicode", 22)); | |
26 | assert_se(!utf8_is_valid_n("ascii is valid unicode", 23)); | |
27 | assert_se( utf8_is_valid_n("\342\204\242", 0)); | |
28 | assert_se(!utf8_is_valid_n("\342\204\242", 1)); | |
29 | assert_se(!utf8_is_valid_n("\342\204\242", 2)); | |
30 | assert_se( utf8_is_valid_n("\342\204\242", 3)); | |
31 | assert_se(!utf8_is_valid_n("\342\204\242", 4)); | |
32 | assert_se( utf8_is_valid_n("<ZZ>", 0)); | |
33 | assert_se( utf8_is_valid_n("<ZZ>", 1)); | |
34 | assert_se( utf8_is_valid_n("<ZZ>", 2)); | |
35 | assert_se( utf8_is_valid_n("<ZZ>", 3)); | |
36 | assert_se( utf8_is_valid_n("<ZZ>", 4)); | |
37 | assert_se(!utf8_is_valid_n("<ZZ>", 5)); | |
38 | } | |
39 | ||
02a36bc9 | 40 | static void test_utf8_is_valid(void) { |
9743846e ZJS |
41 | log_info("/* %s */", __func__); |
42 | ||
02a36bc9 | 43 | assert_se(utf8_is_valid("ascii is valid unicode")); |
8f6ce71f | 44 | assert_se(utf8_is_valid("\342\204\242")); |
02a36bc9 DR |
45 | assert_se(!utf8_is_valid("\341\204")); |
46 | } | |
47 | ||
e7363c59 | 48 | static void test_ascii_is_valid(void) { |
9743846e ZJS |
49 | log_info("/* %s */", __func__); |
50 | ||
294a3121 | 51 | assert_se( ascii_is_valid("alsdjf\t\vbarr\nba z")); |
e7363c59 DR |
52 | assert_se(!ascii_is_valid("\342\204\242")); |
53 | assert_se(!ascii_is_valid("\341\204")); | |
54 | } | |
55 | ||
294a3121 | 56 | static void test_ascii_is_valid_n(void) { |
9743846e ZJS |
57 | log_info("/* %s */", __func__); |
58 | ||
294a3121 ZJS |
59 | assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 17)); |
60 | assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 16)); | |
61 | assert_se(!ascii_is_valid_n("alsdjf\t\vbarr\nba z", 18)); | |
62 | assert_se(!ascii_is_valid_n("\342\204\242", 3)); | |
63 | assert_se(!ascii_is_valid_n("\342\204\242", 2)); | |
64 | assert_se(!ascii_is_valid_n("\342\204\242", 1)); | |
65 | assert_se( ascii_is_valid_n("\342\204\242", 0)); | |
66 | } | |
67 | ||
e7363c59 | 68 | static void test_utf8_encoded_valid_unichar(void) { |
9743846e ZJS |
69 | log_info("/* %s */", __func__); |
70 | ||
92e068b4 ZJS |
71 | assert_se(utf8_encoded_valid_unichar("\342\204\242", 1) == -EINVAL); /* truncated */ |
72 | assert_se(utf8_encoded_valid_unichar("\342\204\242", 2) == -EINVAL); /* truncated */ | |
73 | assert_se(utf8_encoded_valid_unichar("\342\204\242", 3) == 3); | |
74 | assert_se(utf8_encoded_valid_unichar("\342\204\242", 4) == 3); | |
75 | assert_se(utf8_encoded_valid_unichar("\302\256", 1) == -EINVAL); /* truncated */ | |
76 | assert_se(utf8_encoded_valid_unichar("\302\256", 2) == 2); | |
77 | assert_se(utf8_encoded_valid_unichar("\302\256", 3) == 2); | |
78 | assert_se(utf8_encoded_valid_unichar("\302\256", (size_t) -1) == 2); | |
79 | assert_se(utf8_encoded_valid_unichar("a", 1) == 1); | |
80 | assert_se(utf8_encoded_valid_unichar("a", 2) == 1); | |
81 | assert_se(utf8_encoded_valid_unichar("\341\204", 1) == -EINVAL); /* truncated, potentially valid */ | |
82 | assert_se(utf8_encoded_valid_unichar("\341\204", 2) == -EINVAL); /* truncated, potentially valid */ | |
83 | assert_se(utf8_encoded_valid_unichar("\341\204", 3) == -EINVAL); | |
84 | assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 4) == -EINVAL); | |
85 | assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 5) == -EINVAL); | |
e7363c59 DR |
86 | } |
87 | ||
9743846e | 88 | static void test_utf8_escape_invalid(void) { |
550a40ec ZJS |
89 | _cleanup_free_ char *p1, *p2, *p3; |
90 | ||
9743846e ZJS |
91 | log_info("/* %s */", __func__); |
92 | ||
550a40ec ZJS |
93 | p1 = utf8_escape_invalid("goo goo goo"); |
94 | puts(p1); | |
95 | assert_se(utf8_is_valid(p1)); | |
96 | ||
97 | p2 = utf8_escape_invalid("\341\204\341\204"); | |
98 | puts(p2); | |
99 | assert_se(utf8_is_valid(p2)); | |
100 | ||
101 | p3 = utf8_escape_invalid("\341\204"); | |
102 | puts(p3); | |
103 | assert_se(utf8_is_valid(p3)); | |
104 | } | |
105 | ||
9743846e | 106 | static void test_utf8_escape_non_printable(void) { |
3c6d3052 | 107 | _cleanup_free_ char *p1, *p2, *p3, *p4, *p5, *p6; |
fec84576 | 108 | |
9743846e ZJS |
109 | log_info("/* %s */", __func__); |
110 | ||
fec84576 WC |
111 | p1 = utf8_escape_non_printable("goo goo goo"); |
112 | puts(p1); | |
113 | assert_se(utf8_is_valid(p1)); | |
114 | ||
115 | p2 = utf8_escape_non_printable("\341\204\341\204"); | |
116 | puts(p2); | |
117 | assert_se(utf8_is_valid(p2)); | |
118 | ||
119 | p3 = utf8_escape_non_printable("\341\204"); | |
120 | puts(p3); | |
121 | assert_se(utf8_is_valid(p3)); | |
122 | ||
123 | p4 = utf8_escape_non_printable("ąę\n가너도루\n1234\n\341\204\341\204\n\001 \019\20\a"); | |
124 | puts(p4); | |
125 | assert_se(utf8_is_valid(p4)); | |
126 | ||
127 | p5 = utf8_escape_non_printable("\001 \019\20\a"); | |
128 | puts(p5); | |
129 | assert_se(utf8_is_valid(p5)); | |
3c6d3052 LP |
130 | |
131 | p6 = utf8_escape_non_printable("\xef\xbf\x30\x13"); | |
132 | puts(p6); | |
133 | assert_se(utf8_is_valid(p6)); | |
fec84576 WC |
134 | } |
135 | ||
da88f542 ZJS |
136 | static void test_utf8_escape_non_printable_full(void) { |
137 | log_info("/* %s */", __func__); | |
138 | ||
139 | for (size_t i = 0; i < 20; i++) { | |
140 | _cleanup_free_ char *p; | |
141 | ||
142 | p = utf8_escape_non_printable_full("goo goo goo", i); | |
143 | puts(p); | |
144 | assert_se(utf8_is_valid(p)); | |
145 | assert_se(utf8_console_width(p) <= i); | |
146 | } | |
147 | ||
148 | for (size_t i = 0; i < 20; i++) { | |
149 | _cleanup_free_ char *p; | |
150 | ||
151 | p = utf8_escape_non_printable_full("\001 \019\20\a", i); | |
152 | puts(p); | |
153 | assert_se(utf8_is_valid(p)); | |
154 | assert_se(utf8_console_width(p) <= i); | |
155 | } | |
156 | ||
157 | for (size_t i = 0; i < 20; i++) { | |
158 | _cleanup_free_ char *p; | |
159 | ||
160 | p = utf8_escape_non_printable_full("\xef\xbf\x30\x13", i); | |
161 | puts(p); | |
162 | assert_se(utf8_is_valid(p)); | |
163 | assert_se(utf8_console_width(p) <= i); | |
164 | } | |
165 | } | |
166 | ||
04166cb7 | 167 | static void test_utf16_to_utf8(void) { |
80b0a597 LP |
168 | const char16_t utf16[] = { htole16('a'), htole16(0xd800), htole16('b'), htole16(0xdc00), htole16('c'), htole16(0xd801), htole16(0xdc37) }; |
169 | static const char utf8[] = { 'a', 'b', 'c', 0xf0, 0x90, 0x90, 0xb7 }; | |
170 | _cleanup_free_ char16_t *b = NULL; | |
171 | _cleanup_free_ char *a = NULL; | |
04166cb7 | 172 | |
9743846e ZJS |
173 | log_info("/* %s */", __func__); |
174 | ||
80b0a597 LP |
175 | /* Convert UTF-16 to UTF-8, filtering embedded bad chars */ |
176 | a = utf16_to_utf8(utf16, sizeof(utf16)); | |
04166cb7 | 177 | assert_se(a); |
80b0a597 LP |
178 | assert_se(memcmp(a, utf8, sizeof(utf8)) == 0); |
179 | ||
180 | /* Convert UTF-8 to UTF-16, and back */ | |
181 | b = utf8_to_utf16(utf8, sizeof(utf8)); | |
182 | assert_se(b); | |
04166cb7 TG |
183 | |
184 | free(a); | |
80b0a597 LP |
185 | a = utf16_to_utf8(b, char16_strlen(b) * 2); |
186 | assert_se(a); | |
187 | assert_se(strlen(a) == sizeof(utf8)); | |
188 | assert_se(memcmp(a, utf8, sizeof(utf8)) == 0); | |
04166cb7 TG |
189 | } |
190 | ||
e2cbc803 | 191 | static void test_utf8_n_codepoints(void) { |
9743846e ZJS |
192 | log_info("/* %s */", __func__); |
193 | ||
e2cbc803 ZJS |
194 | assert_se(utf8_n_codepoints("abc") == 3); |
195 | assert_se(utf8_n_codepoints("zażółcić gęślą jaźń") == 19); | |
196 | assert_se(utf8_n_codepoints("串") == 1); | |
197 | assert_se(utf8_n_codepoints("") == 0); | |
198 | assert_se(utf8_n_codepoints("…👊🔪💐…") == 5); | |
199 | assert_se(utf8_n_codepoints("\xF1") == (size_t) -1); | |
200 | } | |
201 | ||
7c6c2e07 | 202 | static void test_utf8_console_width(void) { |
9743846e ZJS |
203 | log_info("/* %s */", __func__); |
204 | ||
7c6c2e07 ZJS |
205 | assert_se(utf8_console_width("abc") == 3); |
206 | assert_se(utf8_console_width("zażółcić gęślą jaźń") == 19); | |
207 | assert_se(utf8_console_width("串") == 2); | |
208 | assert_se(utf8_console_width("") == 0); | |
209 | assert_se(utf8_console_width("…👊🔪💐…") == 8); | |
210 | assert_se(utf8_console_width("\xF1") == (size_t) -1); | |
211 | } | |
212 | ||
80b0a597 LP |
213 | static void test_utf8_to_utf16(void) { |
214 | const char *p; | |
215 | ||
9743846e ZJS |
216 | log_info("/* %s */", __func__); |
217 | ||
80b0a597 LP |
218 | FOREACH_STRING(p, |
219 | "abc", | |
220 | "zażółcić gęślą jaźń", | |
221 | "串", | |
222 | "", | |
223 | "…👊🔪💐…") { | |
224 | ||
225 | _cleanup_free_ char16_t *a = NULL; | |
226 | _cleanup_free_ char *b = NULL; | |
227 | ||
228 | a = utf8_to_utf16(p, strlen(p)); | |
229 | assert_se(a); | |
230 | ||
231 | b = utf16_to_utf8(a, char16_strlen(a) * 2); | |
232 | assert_se(b); | |
233 | assert_se(streq(p, b)); | |
234 | } | |
235 | } | |
236 | ||
02a36bc9 | 237 | int main(int argc, char *argv[]) { |
80ab31a4 | 238 | test_utf8_n_is_valid(); |
02a36bc9 | 239 | test_utf8_is_valid(); |
7991ac34 | 240 | test_utf8_is_printable(); |
e7363c59 | 241 | test_ascii_is_valid(); |
294a3121 | 242 | test_ascii_is_valid_n(); |
e7363c59 | 243 | test_utf8_encoded_valid_unichar(); |
9743846e ZJS |
244 | test_utf8_escape_invalid(); |
245 | test_utf8_escape_non_printable(); | |
da88f542 | 246 | test_utf8_escape_non_printable_full(); |
04166cb7 | 247 | test_utf16_to_utf8(); |
e2cbc803 | 248 | test_utf8_n_codepoints(); |
7c6c2e07 | 249 | test_utf8_console_width(); |
80b0a597 | 250 | test_utf8_to_utf16(); |
7991ac34 DR |
251 | |
252 | return 0; | |
02a36bc9 | 253 | } |