]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
02a36bc9 | 2 | |
b5efdb8a | 3 | #include "alloc-util.h" |
cf0fbc49 | 4 | #include "string-util.h" |
80b0a597 | 5 | #include "strv.h" |
02a36bc9 DR |
6 | #include "utf8.h" |
7 | #include "util.h" | |
8 | ||
7991ac34 | 9 | static void test_utf8_is_printable(void) { |
9743846e ZJS |
10 | log_info("/* %s */", __func__); |
11 | ||
7991ac34 DR |
12 | assert_se(utf8_is_printable("ascii is valid\tunicode", 22)); |
13 | assert_se(utf8_is_printable("\342\204\242", 3)); | |
14 | assert_se(!utf8_is_printable("\341\204", 2)); | |
a7176505 | 15 | assert_se(utf8_is_printable("ąę", 4)); |
7991ac34 DR |
16 | } |
17 | ||
02a36bc9 | 18 | static void test_utf8_is_valid(void) { |
9743846e ZJS |
19 | log_info("/* %s */", __func__); |
20 | ||
02a36bc9 | 21 | assert_se(utf8_is_valid("ascii is valid unicode")); |
8f6ce71f | 22 | assert_se(utf8_is_valid("\342\204\242")); |
02a36bc9 DR |
23 | assert_se(!utf8_is_valid("\341\204")); |
24 | } | |
25 | ||
e7363c59 | 26 | static void test_ascii_is_valid(void) { |
9743846e ZJS |
27 | log_info("/* %s */", __func__); |
28 | ||
294a3121 | 29 | assert_se( ascii_is_valid("alsdjf\t\vbarr\nba z")); |
e7363c59 DR |
30 | assert_se(!ascii_is_valid("\342\204\242")); |
31 | assert_se(!ascii_is_valid("\341\204")); | |
32 | } | |
33 | ||
294a3121 | 34 | static void test_ascii_is_valid_n(void) { |
9743846e ZJS |
35 | log_info("/* %s */", __func__); |
36 | ||
294a3121 ZJS |
37 | assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 17)); |
38 | assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 16)); | |
39 | assert_se(!ascii_is_valid_n("alsdjf\t\vbarr\nba z", 18)); | |
40 | assert_se(!ascii_is_valid_n("\342\204\242", 3)); | |
41 | assert_se(!ascii_is_valid_n("\342\204\242", 2)); | |
42 | assert_se(!ascii_is_valid_n("\342\204\242", 1)); | |
43 | assert_se( ascii_is_valid_n("\342\204\242", 0)); | |
44 | } | |
45 | ||
e7363c59 | 46 | static void test_utf8_encoded_valid_unichar(void) { |
9743846e ZJS |
47 | log_info("/* %s */", __func__); |
48 | ||
92e068b4 ZJS |
49 | assert_se(utf8_encoded_valid_unichar("\342\204\242", 1) == -EINVAL); /* truncated */ |
50 | assert_se(utf8_encoded_valid_unichar("\342\204\242", 2) == -EINVAL); /* truncated */ | |
51 | assert_se(utf8_encoded_valid_unichar("\342\204\242", 3) == 3); | |
52 | assert_se(utf8_encoded_valid_unichar("\342\204\242", 4) == 3); | |
53 | assert_se(utf8_encoded_valid_unichar("\302\256", 1) == -EINVAL); /* truncated */ | |
54 | assert_se(utf8_encoded_valid_unichar("\302\256", 2) == 2); | |
55 | assert_se(utf8_encoded_valid_unichar("\302\256", 3) == 2); | |
56 | assert_se(utf8_encoded_valid_unichar("\302\256", (size_t) -1) == 2); | |
57 | assert_se(utf8_encoded_valid_unichar("a", 1) == 1); | |
58 | assert_se(utf8_encoded_valid_unichar("a", 2) == 1); | |
59 | assert_se(utf8_encoded_valid_unichar("\341\204", 1) == -EINVAL); /* truncated, potentially valid */ | |
60 | assert_se(utf8_encoded_valid_unichar("\341\204", 2) == -EINVAL); /* truncated, potentially valid */ | |
61 | assert_se(utf8_encoded_valid_unichar("\341\204", 3) == -EINVAL); | |
62 | assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 4) == -EINVAL); | |
63 | assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 5) == -EINVAL); | |
e7363c59 DR |
64 | } |
65 | ||
9743846e | 66 | static void test_utf8_escape_invalid(void) { |
550a40ec ZJS |
67 | _cleanup_free_ char *p1, *p2, *p3; |
68 | ||
9743846e ZJS |
69 | log_info("/* %s */", __func__); |
70 | ||
550a40ec ZJS |
71 | p1 = utf8_escape_invalid("goo goo goo"); |
72 | puts(p1); | |
73 | assert_se(utf8_is_valid(p1)); | |
74 | ||
75 | p2 = utf8_escape_invalid("\341\204\341\204"); | |
76 | puts(p2); | |
77 | assert_se(utf8_is_valid(p2)); | |
78 | ||
79 | p3 = utf8_escape_invalid("\341\204"); | |
80 | puts(p3); | |
81 | assert_se(utf8_is_valid(p3)); | |
82 | } | |
83 | ||
9743846e | 84 | static void test_utf8_escape_non_printable(void) { |
3c6d3052 | 85 | _cleanup_free_ char *p1, *p2, *p3, *p4, *p5, *p6; |
fec84576 | 86 | |
9743846e ZJS |
87 | log_info("/* %s */", __func__); |
88 | ||
fec84576 WC |
89 | p1 = utf8_escape_non_printable("goo goo goo"); |
90 | puts(p1); | |
91 | assert_se(utf8_is_valid(p1)); | |
92 | ||
93 | p2 = utf8_escape_non_printable("\341\204\341\204"); | |
94 | puts(p2); | |
95 | assert_se(utf8_is_valid(p2)); | |
96 | ||
97 | p3 = utf8_escape_non_printable("\341\204"); | |
98 | puts(p3); | |
99 | assert_se(utf8_is_valid(p3)); | |
100 | ||
101 | p4 = utf8_escape_non_printable("ąę\n가너도루\n1234\n\341\204\341\204\n\001 \019\20\a"); | |
102 | puts(p4); | |
103 | assert_se(utf8_is_valid(p4)); | |
104 | ||
105 | p5 = utf8_escape_non_printable("\001 \019\20\a"); | |
106 | puts(p5); | |
107 | assert_se(utf8_is_valid(p5)); | |
3c6d3052 LP |
108 | |
109 | p6 = utf8_escape_non_printable("\xef\xbf\x30\x13"); | |
110 | puts(p6); | |
111 | assert_se(utf8_is_valid(p6)); | |
fec84576 WC |
112 | } |
113 | ||
da88f542 ZJS |
114 | static void test_utf8_escape_non_printable_full(void) { |
115 | log_info("/* %s */", __func__); | |
116 | ||
117 | for (size_t i = 0; i < 20; i++) { | |
118 | _cleanup_free_ char *p; | |
119 | ||
120 | p = utf8_escape_non_printable_full("goo goo goo", i); | |
121 | puts(p); | |
122 | assert_se(utf8_is_valid(p)); | |
123 | assert_se(utf8_console_width(p) <= i); | |
124 | } | |
125 | ||
126 | for (size_t i = 0; i < 20; i++) { | |
127 | _cleanup_free_ char *p; | |
128 | ||
129 | p = utf8_escape_non_printable_full("\001 \019\20\a", i); | |
130 | puts(p); | |
131 | assert_se(utf8_is_valid(p)); | |
132 | assert_se(utf8_console_width(p) <= i); | |
133 | } | |
134 | ||
135 | for (size_t i = 0; i < 20; i++) { | |
136 | _cleanup_free_ char *p; | |
137 | ||
138 | p = utf8_escape_non_printable_full("\xef\xbf\x30\x13", i); | |
139 | puts(p); | |
140 | assert_se(utf8_is_valid(p)); | |
141 | assert_se(utf8_console_width(p) <= i); | |
142 | } | |
143 | } | |
144 | ||
04166cb7 | 145 | static void test_utf16_to_utf8(void) { |
80b0a597 LP |
146 | const char16_t utf16[] = { htole16('a'), htole16(0xd800), htole16('b'), htole16(0xdc00), htole16('c'), htole16(0xd801), htole16(0xdc37) }; |
147 | static const char utf8[] = { 'a', 'b', 'c', 0xf0, 0x90, 0x90, 0xb7 }; | |
148 | _cleanup_free_ char16_t *b = NULL; | |
149 | _cleanup_free_ char *a = NULL; | |
04166cb7 | 150 | |
9743846e ZJS |
151 | log_info("/* %s */", __func__); |
152 | ||
80b0a597 LP |
153 | /* Convert UTF-16 to UTF-8, filtering embedded bad chars */ |
154 | a = utf16_to_utf8(utf16, sizeof(utf16)); | |
04166cb7 | 155 | assert_se(a); |
80b0a597 LP |
156 | assert_se(memcmp(a, utf8, sizeof(utf8)) == 0); |
157 | ||
158 | /* Convert UTF-8 to UTF-16, and back */ | |
159 | b = utf8_to_utf16(utf8, sizeof(utf8)); | |
160 | assert_se(b); | |
04166cb7 TG |
161 | |
162 | free(a); | |
80b0a597 LP |
163 | a = utf16_to_utf8(b, char16_strlen(b) * 2); |
164 | assert_se(a); | |
165 | assert_se(strlen(a) == sizeof(utf8)); | |
166 | assert_se(memcmp(a, utf8, sizeof(utf8)) == 0); | |
04166cb7 TG |
167 | } |
168 | ||
e2cbc803 | 169 | static void test_utf8_n_codepoints(void) { |
9743846e ZJS |
170 | log_info("/* %s */", __func__); |
171 | ||
e2cbc803 ZJS |
172 | assert_se(utf8_n_codepoints("abc") == 3); |
173 | assert_se(utf8_n_codepoints("zażółcić gęślą jaźń") == 19); | |
174 | assert_se(utf8_n_codepoints("串") == 1); | |
175 | assert_se(utf8_n_codepoints("") == 0); | |
176 | assert_se(utf8_n_codepoints("…👊🔪💐…") == 5); | |
177 | assert_se(utf8_n_codepoints("\xF1") == (size_t) -1); | |
178 | } | |
179 | ||
7c6c2e07 | 180 | static void test_utf8_console_width(void) { |
9743846e ZJS |
181 | log_info("/* %s */", __func__); |
182 | ||
7c6c2e07 ZJS |
183 | assert_se(utf8_console_width("abc") == 3); |
184 | assert_se(utf8_console_width("zażółcić gęślą jaźń") == 19); | |
185 | assert_se(utf8_console_width("串") == 2); | |
186 | assert_se(utf8_console_width("") == 0); | |
187 | assert_se(utf8_console_width("…👊🔪💐…") == 8); | |
188 | assert_se(utf8_console_width("\xF1") == (size_t) -1); | |
189 | } | |
190 | ||
80b0a597 LP |
191 | static void test_utf8_to_utf16(void) { |
192 | const char *p; | |
193 | ||
9743846e ZJS |
194 | log_info("/* %s */", __func__); |
195 | ||
80b0a597 LP |
196 | FOREACH_STRING(p, |
197 | "abc", | |
198 | "zażółcić gęślą jaźń", | |
199 | "串", | |
200 | "", | |
201 | "…👊🔪💐…") { | |
202 | ||
203 | _cleanup_free_ char16_t *a = NULL; | |
204 | _cleanup_free_ char *b = NULL; | |
205 | ||
206 | a = utf8_to_utf16(p, strlen(p)); | |
207 | assert_se(a); | |
208 | ||
209 | b = utf16_to_utf8(a, char16_strlen(a) * 2); | |
210 | assert_se(b); | |
211 | assert_se(streq(p, b)); | |
212 | } | |
213 | } | |
214 | ||
02a36bc9 DR |
215 | int main(int argc, char *argv[]) { |
216 | test_utf8_is_valid(); | |
7991ac34 | 217 | test_utf8_is_printable(); |
e7363c59 | 218 | test_ascii_is_valid(); |
294a3121 | 219 | test_ascii_is_valid_n(); |
e7363c59 | 220 | test_utf8_encoded_valid_unichar(); |
9743846e ZJS |
221 | test_utf8_escape_invalid(); |
222 | test_utf8_escape_non_printable(); | |
da88f542 | 223 | test_utf8_escape_non_printable_full(); |
04166cb7 | 224 | test_utf16_to_utf8(); |
e2cbc803 | 225 | test_utf8_n_codepoints(); |
7c6c2e07 | 226 | test_utf8_console_width(); |
80b0a597 | 227 | test_utf8_to_utf16(); |
7991ac34 DR |
228 | |
229 | return 0; | |
02a36bc9 | 230 | } |