]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
02a36bc9 | 2 | |
b5efdb8a | 3 | #include "alloc-util.h" |
cf0fbc49 | 4 | #include "string-util.h" |
80b0a597 | 5 | #include "strv.h" |
02a36bc9 DR |
6 | #include "utf8.h" |
7 | #include "util.h" | |
8 | ||
7991ac34 | 9 | static void test_utf8_is_printable(void) { |
9743846e ZJS |
10 | log_info("/* %s */", __func__); |
11 | ||
7991ac34 DR |
12 | assert_se(utf8_is_printable("ascii is valid\tunicode", 22)); |
13 | assert_se(utf8_is_printable("\342\204\242", 3)); | |
14 | assert_se(!utf8_is_printable("\341\204", 2)); | |
a7176505 | 15 | assert_se(utf8_is_printable("ąę", 4)); |
62a3fc6d ZJS |
16 | assert_se(!utf8_is_printable("\r", 1)); |
17 | assert_se(utf8_is_printable("\n", 1)); | |
18 | assert_se(utf8_is_printable("\t", 1)); | |
7991ac34 DR |
19 | } |
20 | ||
02a36bc9 | 21 | static void test_utf8_is_valid(void) { |
9743846e ZJS |
22 | log_info("/* %s */", __func__); |
23 | ||
02a36bc9 | 24 | assert_se(utf8_is_valid("ascii is valid unicode")); |
8f6ce71f | 25 | assert_se(utf8_is_valid("\342\204\242")); |
02a36bc9 DR |
26 | assert_se(!utf8_is_valid("\341\204")); |
27 | } | |
28 | ||
e7363c59 | 29 | static void test_ascii_is_valid(void) { |
9743846e ZJS |
30 | log_info("/* %s */", __func__); |
31 | ||
294a3121 | 32 | assert_se( ascii_is_valid("alsdjf\t\vbarr\nba z")); |
e7363c59 DR |
33 | assert_se(!ascii_is_valid("\342\204\242")); |
34 | assert_se(!ascii_is_valid("\341\204")); | |
35 | } | |
36 | ||
294a3121 | 37 | static void test_ascii_is_valid_n(void) { |
9743846e ZJS |
38 | log_info("/* %s */", __func__); |
39 | ||
294a3121 ZJS |
40 | assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 17)); |
41 | assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 16)); | |
42 | assert_se(!ascii_is_valid_n("alsdjf\t\vbarr\nba z", 18)); | |
43 | assert_se(!ascii_is_valid_n("\342\204\242", 3)); | |
44 | assert_se(!ascii_is_valid_n("\342\204\242", 2)); | |
45 | assert_se(!ascii_is_valid_n("\342\204\242", 1)); | |
46 | assert_se( ascii_is_valid_n("\342\204\242", 0)); | |
47 | } | |
48 | ||
e7363c59 | 49 | static void test_utf8_encoded_valid_unichar(void) { |
9743846e ZJS |
50 | log_info("/* %s */", __func__); |
51 | ||
92e068b4 ZJS |
52 | assert_se(utf8_encoded_valid_unichar("\342\204\242", 1) == -EINVAL); /* truncated */ |
53 | assert_se(utf8_encoded_valid_unichar("\342\204\242", 2) == -EINVAL); /* truncated */ | |
54 | assert_se(utf8_encoded_valid_unichar("\342\204\242", 3) == 3); | |
55 | assert_se(utf8_encoded_valid_unichar("\342\204\242", 4) == 3); | |
56 | assert_se(utf8_encoded_valid_unichar("\302\256", 1) == -EINVAL); /* truncated */ | |
57 | assert_se(utf8_encoded_valid_unichar("\302\256", 2) == 2); | |
58 | assert_se(utf8_encoded_valid_unichar("\302\256", 3) == 2); | |
59 | assert_se(utf8_encoded_valid_unichar("\302\256", (size_t) -1) == 2); | |
60 | assert_se(utf8_encoded_valid_unichar("a", 1) == 1); | |
61 | assert_se(utf8_encoded_valid_unichar("a", 2) == 1); | |
62 | assert_se(utf8_encoded_valid_unichar("\341\204", 1) == -EINVAL); /* truncated, potentially valid */ | |
63 | assert_se(utf8_encoded_valid_unichar("\341\204", 2) == -EINVAL); /* truncated, potentially valid */ | |
64 | assert_se(utf8_encoded_valid_unichar("\341\204", 3) == -EINVAL); | |
65 | assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 4) == -EINVAL); | |
66 | assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 5) == -EINVAL); | |
e7363c59 DR |
67 | } |
68 | ||
9743846e | 69 | static void test_utf8_escape_invalid(void) { |
550a40ec ZJS |
70 | _cleanup_free_ char *p1, *p2, *p3; |
71 | ||
9743846e ZJS |
72 | log_info("/* %s */", __func__); |
73 | ||
550a40ec ZJS |
74 | p1 = utf8_escape_invalid("goo goo goo"); |
75 | puts(p1); | |
76 | assert_se(utf8_is_valid(p1)); | |
77 | ||
78 | p2 = utf8_escape_invalid("\341\204\341\204"); | |
79 | puts(p2); | |
80 | assert_se(utf8_is_valid(p2)); | |
81 | ||
82 | p3 = utf8_escape_invalid("\341\204"); | |
83 | puts(p3); | |
84 | assert_se(utf8_is_valid(p3)); | |
85 | } | |
86 | ||
9743846e | 87 | static void test_utf8_escape_non_printable(void) { |
3c6d3052 | 88 | _cleanup_free_ char *p1, *p2, *p3, *p4, *p5, *p6; |
fec84576 | 89 | |
9743846e ZJS |
90 | log_info("/* %s */", __func__); |
91 | ||
fec84576 WC |
92 | p1 = utf8_escape_non_printable("goo goo goo"); |
93 | puts(p1); | |
94 | assert_se(utf8_is_valid(p1)); | |
95 | ||
96 | p2 = utf8_escape_non_printable("\341\204\341\204"); | |
97 | puts(p2); | |
98 | assert_se(utf8_is_valid(p2)); | |
99 | ||
100 | p3 = utf8_escape_non_printable("\341\204"); | |
101 | puts(p3); | |
102 | assert_se(utf8_is_valid(p3)); | |
103 | ||
104 | p4 = utf8_escape_non_printable("ąę\n가너도루\n1234\n\341\204\341\204\n\001 \019\20\a"); | |
105 | puts(p4); | |
106 | assert_se(utf8_is_valid(p4)); | |
107 | ||
108 | p5 = utf8_escape_non_printable("\001 \019\20\a"); | |
109 | puts(p5); | |
110 | assert_se(utf8_is_valid(p5)); | |
3c6d3052 LP |
111 | |
112 | p6 = utf8_escape_non_printable("\xef\xbf\x30\x13"); | |
113 | puts(p6); | |
114 | assert_se(utf8_is_valid(p6)); | |
fec84576 WC |
115 | } |
116 | ||
da88f542 ZJS |
117 | static void test_utf8_escape_non_printable_full(void) { |
118 | log_info("/* %s */", __func__); | |
119 | ||
120 | for (size_t i = 0; i < 20; i++) { | |
121 | _cleanup_free_ char *p; | |
122 | ||
123 | p = utf8_escape_non_printable_full("goo goo goo", i); | |
124 | puts(p); | |
125 | assert_se(utf8_is_valid(p)); | |
126 | assert_se(utf8_console_width(p) <= i); | |
127 | } | |
128 | ||
129 | for (size_t i = 0; i < 20; i++) { | |
130 | _cleanup_free_ char *p; | |
131 | ||
132 | p = utf8_escape_non_printable_full("\001 \019\20\a", i); | |
133 | puts(p); | |
134 | assert_se(utf8_is_valid(p)); | |
135 | assert_se(utf8_console_width(p) <= i); | |
136 | } | |
137 | ||
138 | for (size_t i = 0; i < 20; i++) { | |
139 | _cleanup_free_ char *p; | |
140 | ||
141 | p = utf8_escape_non_printable_full("\xef\xbf\x30\x13", i); | |
142 | puts(p); | |
143 | assert_se(utf8_is_valid(p)); | |
144 | assert_se(utf8_console_width(p) <= i); | |
145 | } | |
146 | } | |
147 | ||
04166cb7 | 148 | static void test_utf16_to_utf8(void) { |
80b0a597 LP |
149 | const char16_t utf16[] = { htole16('a'), htole16(0xd800), htole16('b'), htole16(0xdc00), htole16('c'), htole16(0xd801), htole16(0xdc37) }; |
150 | static const char utf8[] = { 'a', 'b', 'c', 0xf0, 0x90, 0x90, 0xb7 }; | |
151 | _cleanup_free_ char16_t *b = NULL; | |
152 | _cleanup_free_ char *a = NULL; | |
04166cb7 | 153 | |
9743846e ZJS |
154 | log_info("/* %s */", __func__); |
155 | ||
80b0a597 LP |
156 | /* Convert UTF-16 to UTF-8, filtering embedded bad chars */ |
157 | a = utf16_to_utf8(utf16, sizeof(utf16)); | |
04166cb7 | 158 | assert_se(a); |
80b0a597 LP |
159 | assert_se(memcmp(a, utf8, sizeof(utf8)) == 0); |
160 | ||
161 | /* Convert UTF-8 to UTF-16, and back */ | |
162 | b = utf8_to_utf16(utf8, sizeof(utf8)); | |
163 | assert_se(b); | |
04166cb7 TG |
164 | |
165 | free(a); | |
80b0a597 LP |
166 | a = utf16_to_utf8(b, char16_strlen(b) * 2); |
167 | assert_se(a); | |
168 | assert_se(strlen(a) == sizeof(utf8)); | |
169 | assert_se(memcmp(a, utf8, sizeof(utf8)) == 0); | |
04166cb7 TG |
170 | } |
171 | ||
e2cbc803 | 172 | static void test_utf8_n_codepoints(void) { |
9743846e ZJS |
173 | log_info("/* %s */", __func__); |
174 | ||
e2cbc803 ZJS |
175 | assert_se(utf8_n_codepoints("abc") == 3); |
176 | assert_se(utf8_n_codepoints("zażółcić gęślą jaźń") == 19); | |
177 | assert_se(utf8_n_codepoints("串") == 1); | |
178 | assert_se(utf8_n_codepoints("") == 0); | |
179 | assert_se(utf8_n_codepoints("…👊🔪💐…") == 5); | |
180 | assert_se(utf8_n_codepoints("\xF1") == (size_t) -1); | |
181 | } | |
182 | ||
7c6c2e07 | 183 | static void test_utf8_console_width(void) { |
9743846e ZJS |
184 | log_info("/* %s */", __func__); |
185 | ||
7c6c2e07 ZJS |
186 | assert_se(utf8_console_width("abc") == 3); |
187 | assert_se(utf8_console_width("zażółcić gęślą jaźń") == 19); | |
188 | assert_se(utf8_console_width("串") == 2); | |
189 | assert_se(utf8_console_width("") == 0); | |
190 | assert_se(utf8_console_width("…👊🔪💐…") == 8); | |
191 | assert_se(utf8_console_width("\xF1") == (size_t) -1); | |
192 | } | |
193 | ||
80b0a597 LP |
194 | static void test_utf8_to_utf16(void) { |
195 | const char *p; | |
196 | ||
9743846e ZJS |
197 | log_info("/* %s */", __func__); |
198 | ||
80b0a597 LP |
199 | FOREACH_STRING(p, |
200 | "abc", | |
201 | "zażółcić gęślą jaźń", | |
202 | "串", | |
203 | "", | |
204 | "…👊🔪💐…") { | |
205 | ||
206 | _cleanup_free_ char16_t *a = NULL; | |
207 | _cleanup_free_ char *b = NULL; | |
208 | ||
209 | a = utf8_to_utf16(p, strlen(p)); | |
210 | assert_se(a); | |
211 | ||
212 | b = utf16_to_utf8(a, char16_strlen(a) * 2); | |
213 | assert_se(b); | |
214 | assert_se(streq(p, b)); | |
215 | } | |
216 | } | |
217 | ||
02a36bc9 DR |
218 | int main(int argc, char *argv[]) { |
219 | test_utf8_is_valid(); | |
7991ac34 | 220 | test_utf8_is_printable(); |
e7363c59 | 221 | test_ascii_is_valid(); |
294a3121 | 222 | test_ascii_is_valid_n(); |
e7363c59 | 223 | test_utf8_encoded_valid_unichar(); |
9743846e ZJS |
224 | test_utf8_escape_invalid(); |
225 | test_utf8_escape_non_printable(); | |
da88f542 | 226 | test_utf8_escape_non_printable_full(); |
04166cb7 | 227 | test_utf16_to_utf8(); |
e2cbc803 | 228 | test_utf8_n_codepoints(); |
7c6c2e07 | 229 | test_utf8_console_width(); |
80b0a597 | 230 | test_utf8_to_utf16(); |
7991ac34 DR |
231 | |
232 | return 0; | |
02a36bc9 | 233 | } |