]>
Commit | Line | Data |
---|---|---|
f4fe72a4 | 1 | /* Test c8rtomb. |
dff8da6b | 2 | Copyright (C) 2022-2024 Free Software Foundation, Inc. |
f4fe72a4 TH |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, see | |
17 | <https://www.gnu.org/licenses/>. */ | |
18 | ||
19 | #include <errno.h> | |
20 | #include <limits.h> | |
21 | #include <locale.h> | |
22 | #include <stdio.h> | |
23 | #include <stdlib.h> | |
24 | #include <string.h> | |
25 | #include <uchar.h> | |
26 | #include <wchar.h> | |
27 | #include <support/check.h> | |
28 | #include <support/support.h> | |
29 | ||
30 | static int | |
31 | test_truncated_code_unit_sequence (void) | |
32 | { | |
33 | /* Missing trailing code unit for a two code byte unit sequence. */ | |
34 | { | |
35 | const char8_t *u8s = (const char8_t*) u8"\xC2"; | |
36 | char buf[MB_LEN_MAX] = { 0 }; | |
37 | mbstate_t s = { 0 }; | |
38 | ||
39 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
40 | errno = 0; | |
41 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); | |
42 | TEST_COMPARE (errno, EILSEQ); | |
43 | } | |
44 | ||
45 | /* Missing first trailing code unit for a three byte code unit sequence. */ | |
46 | { | |
47 | const char8_t *u8s = (const char8_t*) u8"\xE0"; | |
48 | char buf[MB_LEN_MAX] = { 0 }; | |
49 | mbstate_t s = { 0 }; | |
50 | ||
51 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
52 | errno = 0; | |
53 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); | |
54 | TEST_COMPARE (errno, EILSEQ); | |
55 | } | |
56 | ||
57 | /* Missing second trailing code unit for a three byte code unit sequence. */ | |
58 | { | |
59 | const char8_t *u8s = (const char8_t*) u8"\xE0\xA0"; | |
60 | char buf[MB_LEN_MAX] = { 0 }; | |
61 | mbstate_t s = { 0 }; | |
62 | ||
63 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
64 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
65 | errno = 0; | |
66 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) -1); | |
67 | TEST_COMPARE (errno, EILSEQ); | |
68 | } | |
69 | ||
70 | /* Missing first trailing code unit for a four byte code unit sequence. */ | |
71 | { | |
72 | const char8_t *u8s = (const char8_t*) u8"\xF0"; | |
73 | char buf[MB_LEN_MAX] = { 0 }; | |
74 | mbstate_t s = { 0 }; | |
75 | ||
76 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
77 | errno = 0; | |
78 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); | |
79 | TEST_COMPARE (errno, EILSEQ); | |
80 | } | |
81 | ||
82 | /* Missing second trailing code unit for a four byte code unit sequence. */ | |
83 | { | |
84 | const char8_t *u8s = (const char8_t*) u8"\xF0\x90"; | |
85 | char buf[MB_LEN_MAX] = { 0 }; | |
86 | mbstate_t s = { 0 }; | |
87 | ||
88 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
89 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
90 | errno = 0; | |
91 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) -1); | |
92 | TEST_COMPARE (errno, EILSEQ); | |
93 | } | |
94 | ||
95 | /* Missing third trailing code unit for a four byte code unit sequence. */ | |
96 | { | |
97 | const char8_t *u8s = (const char8_t*) u8"\xF0\x90\x80"; | |
98 | char buf[MB_LEN_MAX] = { 0 }; | |
99 | mbstate_t s = { 0 }; | |
100 | ||
101 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
102 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
103 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0); | |
104 | errno = 0; | |
105 | TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) -1); | |
106 | TEST_COMPARE (errno, EILSEQ); | |
107 | } | |
108 | ||
109 | return 0; | |
110 | } | |
111 | ||
112 | static int | |
113 | test_invalid_trailing_code_unit_sequence (void) | |
114 | { | |
115 | /* Invalid trailing code unit for a two code byte unit sequence. */ | |
116 | { | |
117 | const char8_t *u8s = (const char8_t*) u8"\xC2\xC0"; | |
118 | char buf[MB_LEN_MAX] = { 0 }; | |
119 | mbstate_t s = { 0 }; | |
120 | ||
121 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
122 | errno = 0; | |
123 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); | |
124 | TEST_COMPARE (errno, EILSEQ); | |
125 | } | |
126 | ||
127 | /* Invalid first trailing code unit for a three byte code unit sequence. */ | |
128 | { | |
129 | const char8_t *u8s = (const char8_t*) u8"\xE0\xC0"; | |
130 | char buf[MB_LEN_MAX] = { 0 }; | |
131 | mbstate_t s = { 0 }; | |
132 | ||
133 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
134 | errno = 0; | |
135 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); | |
136 | TEST_COMPARE (errno, EILSEQ); | |
137 | } | |
138 | ||
139 | /* Invalid second trailing code unit for a three byte code unit sequence. */ | |
140 | { | |
141 | const char8_t *u8s = (const char8_t*) u8"\xE0\xA0\xC0"; | |
142 | char buf[MB_LEN_MAX] = { 0 }; | |
143 | mbstate_t s = { 0 }; | |
144 | ||
145 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
146 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
147 | errno = 0; | |
148 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) -1); | |
149 | TEST_COMPARE (errno, EILSEQ); | |
150 | } | |
151 | ||
152 | /* Invalid first trailing code unit for a four byte code unit sequence. */ | |
153 | { | |
154 | const char8_t *u8s = (const char8_t*) u8"\xF0\xC0"; | |
155 | char buf[MB_LEN_MAX] = { 0 }; | |
156 | mbstate_t s = { 0 }; | |
157 | ||
158 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
159 | errno = 0; | |
160 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); | |
161 | TEST_COMPARE (errno, EILSEQ); | |
162 | } | |
163 | ||
164 | /* Invalid second trailing code unit for a four byte code unit sequence. */ | |
165 | { | |
166 | const char8_t *u8s = (const char8_t*) u8"\xF0\x90\xC0"; | |
167 | char buf[MB_LEN_MAX] = { 0 }; | |
168 | mbstate_t s = { 0 }; | |
169 | ||
170 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
171 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
172 | errno = 0; | |
173 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) -1); | |
174 | TEST_COMPARE (errno, EILSEQ); | |
175 | } | |
176 | ||
177 | /* Invalid third trailing code unit for a four byte code unit sequence. */ | |
178 | { | |
179 | const char8_t *u8s = (const char8_t*) u8"\xF0\x90\x80\xC0"; | |
180 | char buf[MB_LEN_MAX] = { 0 }; | |
181 | mbstate_t s = { 0 }; | |
182 | ||
183 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
184 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
185 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0); | |
186 | errno = 0; | |
187 | TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) -1); | |
188 | TEST_COMPARE (errno, EILSEQ); | |
189 | } | |
190 | ||
191 | return 0; | |
192 | } | |
193 | ||
194 | static int | |
195 | test_lone_trailing_code_units (void) | |
196 | { | |
197 | /* Lone trailing code unit. */ | |
198 | const char8_t *u8s = (const char8_t*) u8"\x80"; | |
199 | char buf[MB_LEN_MAX] = { 0 }; | |
200 | mbstate_t s = { 0 }; | |
201 | ||
202 | errno = 0; | |
203 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) -1); | |
204 | TEST_COMPARE (errno, EILSEQ); | |
205 | ||
206 | return 0; | |
207 | } | |
208 | ||
209 | static int | |
210 | test_overlong_encoding (void) | |
211 | { | |
212 | /* Two byte overlong encoding. */ | |
213 | { | |
214 | const char8_t *u8s = (const char8_t*) u8"\xC0\x80"; | |
215 | char buf[MB_LEN_MAX] = { 0 }; | |
216 | mbstate_t s = { 0 }; | |
217 | ||
218 | errno = 0; | |
219 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) -1); | |
220 | TEST_COMPARE (errno, EILSEQ); | |
221 | } | |
222 | ||
223 | /* Two byte overlong encoding. */ | |
224 | { | |
225 | const char8_t *u8s = (const char8_t*) u8"\xC1\x80"; | |
226 | char buf[MB_LEN_MAX] = { 0 }; | |
227 | mbstate_t s = { 0 }; | |
228 | ||
229 | errno = 0; | |
230 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) -1); | |
231 | TEST_COMPARE (errno, EILSEQ); | |
232 | } | |
233 | ||
234 | /* Three byte overlong encoding. */ | |
235 | { | |
236 | const char8_t *u8s = (const char8_t*) u8"\xE0\x9F\xBF"; | |
237 | char buf[MB_LEN_MAX] = { 0 }; | |
238 | mbstate_t s = { 0 }; | |
239 | ||
240 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
241 | errno = 0; | |
242 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); | |
243 | TEST_COMPARE (errno, EILSEQ); | |
244 | } | |
245 | ||
246 | /* Four byte overlong encoding. */ | |
247 | { | |
248 | const char8_t *u8s = (const char8_t*) u8"\xF0\x8F\xBF\xBF"; | |
249 | char buf[MB_LEN_MAX] = { 0 }; | |
250 | mbstate_t s = { 0 }; | |
251 | ||
252 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
253 | errno = 0; | |
254 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); | |
255 | TEST_COMPARE (errno, EILSEQ); | |
256 | } | |
257 | ||
258 | return 0; | |
259 | } | |
260 | ||
261 | static int | |
262 | test_surrogate_range (void) | |
263 | { | |
264 | /* Would encode U+D800. */ | |
265 | { | |
266 | const char8_t *u8s = (const char8_t*) u8"\xED\xA0\x80"; | |
267 | char buf[MB_LEN_MAX] = { 0 }; | |
268 | mbstate_t s = { 0 }; | |
269 | ||
270 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
271 | errno = 0; | |
272 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); | |
273 | TEST_COMPARE (errno, EILSEQ); | |
274 | } | |
275 | ||
276 | /* Would encode U+DFFF. */ | |
277 | { | |
278 | const char8_t *u8s = (const char8_t*) u8"\xED\xBF\xBF"; | |
279 | char buf[MB_LEN_MAX] = { 0 }; | |
280 | mbstate_t s = { 0 }; | |
281 | ||
282 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
283 | errno = 0; | |
284 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); | |
285 | TEST_COMPARE (errno, EILSEQ); | |
286 | } | |
287 | ||
288 | return 0; | |
289 | } | |
290 | ||
291 | static int | |
292 | test_out_of_range_encoding (void) | |
293 | { | |
294 | /* Would encode U+00110000. */ | |
295 | { | |
296 | const char8_t *u8s = (const char8_t*) u8"\xF4\x90\x80\x80"; | |
297 | char buf[MB_LEN_MAX] = { 0 }; | |
298 | mbstate_t s = { 0 }; | |
299 | ||
300 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
301 | errno = 0; | |
302 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) -1); | |
303 | TEST_COMPARE (errno, EILSEQ); | |
304 | } | |
305 | ||
306 | /* Would encode U+00140000. */ | |
307 | { | |
308 | const char8_t *u8s = (const char8_t*) u8"\xF5\x90\x80\x80"; | |
309 | char buf[MB_LEN_MAX] = { 0 }; | |
310 | mbstate_t s = { 0 }; | |
311 | ||
312 | errno = 0; | |
313 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) -1); | |
314 | TEST_COMPARE (errno, EILSEQ); | |
315 | } | |
316 | ||
317 | return 0; | |
318 | } | |
319 | ||
320 | static int | |
321 | test_null_output_buffer (void) | |
322 | { | |
323 | /* Null character with an initial state. */ | |
324 | { | |
325 | mbstate_t s = { 0 }; | |
326 | ||
327 | TEST_COMPARE (c8rtomb (NULL, u8"X"[0], &s), (size_t) 1); | |
328 | /* Assert the state is now an initial state. */ | |
329 | TEST_VERIFY (mbsinit (&s)); | |
330 | } | |
331 | ||
332 | /* Null buffer with a state corresponding to an incompletely read code | |
333 | unit sequence. In this case, an error occurs since insufficient | |
334 | information is available to complete the already started code unit | |
335 | sequence and return to the initial state. */ | |
336 | { | |
337 | char buf[MB_LEN_MAX] = { 0 }; | |
338 | mbstate_t s = { 0 }; | |
339 | ||
340 | TEST_COMPARE (c8rtomb (buf, u8"\xC2"[0], &s), (size_t) 0); | |
341 | errno = 0; | |
342 | TEST_COMPARE (c8rtomb (NULL, u8"\x80"[0], &s), (size_t) -1); | |
343 | TEST_COMPARE (errno, EILSEQ); | |
344 | } | |
345 | ||
346 | return 0; | |
347 | } | |
348 | ||
349 | static int | |
350 | test_utf8 (void) | |
351 | { | |
352 | xsetlocale (LC_ALL, "de_DE.UTF-8"); | |
353 | ||
354 | /* Null character. */ | |
355 | { | |
356 | /* U+0000 => 0x00 */ | |
357 | const char8_t *u8s = (const char8_t*) u8"\x00"; | |
358 | char buf[MB_LEN_MAX] = { 0 }; | |
359 | mbstate_t s = { 0 }; | |
360 | ||
361 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 1); | |
362 | TEST_COMPARE (buf[0], (char) 0x00); | |
363 | TEST_VERIFY (mbsinit (&s)); | |
364 | } | |
365 | ||
366 | /* First non-null character in the code point range that maps to a single | |
367 | code unit. */ | |
368 | { | |
369 | /* U+0001 => 0x01 */ | |
370 | const char8_t *u8s = (const char8_t*) u8"\x01"; | |
371 | char buf[MB_LEN_MAX] = { 0 }; | |
372 | mbstate_t s = { 0 }; | |
373 | ||
374 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 1); | |
375 | TEST_COMPARE (buf[0], (char) 0x01); | |
376 | TEST_VERIFY (mbsinit (&s)); | |
377 | } | |
378 | ||
379 | /* Last character in the code point range that maps to a single code unit. */ | |
380 | { | |
381 | /* U+007F => 0x7F */ | |
382 | const char8_t *u8s = (const char8_t*) u8"\x7F"; | |
383 | char buf[MB_LEN_MAX] = { 0 }; | |
384 | mbstate_t s = { 0 }; | |
385 | ||
386 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 1); | |
387 | TEST_COMPARE (buf[0], (char) 0x7F); | |
388 | TEST_VERIFY (mbsinit (&s)); | |
389 | } | |
390 | ||
391 | /* First character in the code point range that maps to two code units. */ | |
392 | { | |
393 | /* U+0080 => 0xC2 0x80 */ | |
394 | const char8_t *u8s = (const char8_t*) u8"\xC2\x80"; | |
395 | char buf[MB_LEN_MAX] = { 0 }; | |
396 | mbstate_t s = { 0 }; | |
397 | ||
398 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
399 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 2); | |
400 | TEST_COMPARE (buf[0], (char) 0xC2); | |
401 | TEST_COMPARE (buf[1], (char) 0x80); | |
402 | TEST_VERIFY (mbsinit (&s)); | |
403 | } | |
404 | ||
405 | /* Last character in the code point range that maps to two code units. */ | |
406 | { | |
407 | /* U+07FF => 0xDF 0xBF */ | |
408 | const char8_t *u8s = (const char8_t*) u8"\u07FF"; | |
409 | char buf[MB_LEN_MAX] = { 0 }; | |
410 | mbstate_t s = { 0 }; | |
411 | ||
412 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
413 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 2); | |
414 | TEST_COMPARE (buf[0], (char) 0xDF); | |
415 | TEST_COMPARE (buf[1], (char) 0xBF); | |
416 | TEST_VERIFY (mbsinit (&s)); | |
417 | } | |
418 | ||
419 | /* First character in the code point range that maps to three code units. */ | |
420 | { | |
421 | /* U+0800 => 0xE0 0xA0 0x80 */ | |
422 | const char8_t *u8s = (const char8_t*) u8"\u0800"; | |
423 | char buf[MB_LEN_MAX] = { 0 }; | |
424 | mbstate_t s = { 0 }; | |
425 | ||
426 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
427 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
428 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3); | |
429 | TEST_COMPARE (buf[0], (char) 0xE0); | |
430 | TEST_COMPARE (buf[1], (char) 0xA0); | |
431 | TEST_COMPARE (buf[2], (char) 0x80); | |
432 | TEST_VERIFY (mbsinit (&s)); | |
433 | } | |
434 | ||
435 | /* Last character in the code point range that maps to three code units | |
436 | before the surrogate code point range. */ | |
437 | { | |
438 | /* U+D7FF => 0xED 0x9F 0xBF */ | |
439 | const char8_t *u8s = (const char8_t*) u8"\uD7FF"; | |
440 | char buf[MB_LEN_MAX] = { 0 }; | |
441 | mbstate_t s = { 0 }; | |
442 | ||
443 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
444 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
445 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3); | |
446 | TEST_COMPARE (buf[0], (char) 0xED); | |
447 | TEST_COMPARE (buf[1], (char) 0x9F); | |
448 | TEST_COMPARE (buf[2], (char) 0xBF); | |
449 | TEST_VERIFY (mbsinit (&s)); | |
450 | } | |
451 | ||
452 | /* First character in the code point range that maps to three code units | |
453 | after the surrogate code point range. */ | |
454 | { | |
455 | /* U+E000 => 0xEE 0x80 0x80 */ | |
456 | const char8_t *u8s = (const char8_t*) u8"\uE000"; | |
457 | char buf[MB_LEN_MAX] = { 0 }; | |
458 | mbstate_t s = { 0 }; | |
459 | ||
460 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
461 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
462 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3); | |
463 | TEST_COMPARE (buf[0], (char) 0xEE); | |
464 | TEST_COMPARE (buf[1], (char) 0x80); | |
465 | TEST_COMPARE (buf[2], (char) 0x80); | |
466 | TEST_VERIFY (mbsinit (&s)); | |
467 | } | |
468 | ||
469 | /* Not a BOM. */ | |
470 | { | |
471 | /* U+FEFF => 0xEF 0xBB 0xBF */ | |
472 | const char8_t *u8s = (const char8_t*) u8"\uFEFF"; | |
473 | char buf[MB_LEN_MAX] = { 0 }; | |
474 | mbstate_t s = { 0 }; | |
475 | ||
476 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
477 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
478 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3); | |
479 | TEST_COMPARE (buf[0], (char) 0xEF); | |
480 | TEST_COMPARE (buf[1], (char) 0xBB); | |
481 | TEST_COMPARE (buf[2], (char) 0xBF); | |
482 | TEST_VERIFY (mbsinit (&s)); | |
483 | } | |
484 | ||
485 | /* Replacement character. */ | |
486 | { | |
487 | /* U+FFFD => 0xEF 0xBF 0xBD */ | |
488 | const char8_t *u8s = (const char8_t*) u8"\uFFFD"; | |
489 | char buf[MB_LEN_MAX] = { 0 }; | |
490 | mbstate_t s = { 0 }; | |
491 | ||
492 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
493 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
494 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3); | |
495 | TEST_COMPARE (buf[0], (char) 0xEF); | |
496 | TEST_COMPARE (buf[1], (char) 0xBF); | |
497 | TEST_COMPARE (buf[2], (char) 0xBD); | |
498 | TEST_VERIFY (mbsinit (&s)); | |
499 | } | |
500 | ||
501 | /* Last character in the code point range that maps to three code units. */ | |
502 | { | |
503 | /* U+FFFF => 0xEF 0xBF 0xBF */ | |
504 | const char8_t *u8s = (const char8_t*) u8"\uFFFF"; | |
505 | char buf[MB_LEN_MAX] = { 0 }; | |
506 | mbstate_t s = { 0 }; | |
507 | ||
508 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
509 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
510 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 3); | |
511 | TEST_COMPARE (buf[0], (char) 0xEF); | |
512 | TEST_COMPARE (buf[1], (char) 0xBF); | |
513 | TEST_COMPARE (buf[2], (char) 0xBF); | |
514 | TEST_VERIFY (mbsinit (&s)); | |
515 | } | |
516 | ||
517 | /* First character in the code point range that maps to four code units. */ | |
518 | { | |
519 | /* U+10000 => 0xF0 0x90 0x80 0x80 */ | |
520 | const char8_t *u8s = (const char8_t*) u8"\U00010000"; | |
521 | char buf[MB_LEN_MAX] = { 0 }; | |
522 | mbstate_t s = { 0 }; | |
523 | ||
524 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
525 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
526 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0); | |
527 | TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) 4); | |
528 | TEST_COMPARE (buf[0], (char) 0xF0); | |
529 | TEST_COMPARE (buf[1], (char) 0x90); | |
530 | TEST_COMPARE (buf[2], (char) 0x80); | |
531 | TEST_COMPARE (buf[3], (char) 0x80); | |
532 | TEST_VERIFY (mbsinit (&s)); | |
533 | } | |
534 | ||
535 | /* Last character in the code point range that maps to four code units. */ | |
536 | { | |
537 | /* U+10FFFF => 0xF4 0x8F 0xBF 0xBF */ | |
538 | const char8_t *u8s = (const char8_t*) u8"\U0010FFFF"; | |
539 | char buf[MB_LEN_MAX] = { 0 }; | |
540 | mbstate_t s = { 0 }; | |
541 | ||
542 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
543 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
544 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0); | |
545 | TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) 4); | |
546 | TEST_COMPARE (buf[0], (char) 0xF4); | |
547 | TEST_COMPARE (buf[1], (char) 0x8F); | |
548 | TEST_COMPARE (buf[2], (char) 0xBF); | |
549 | TEST_COMPARE (buf[3], (char) 0xBF); | |
550 | TEST_VERIFY (mbsinit (&s)); | |
551 | } | |
552 | ||
553 | return 0; | |
554 | } | |
555 | ||
556 | static int | |
557 | test_big5_hkscs (void) | |
558 | { | |
559 | xsetlocale (LC_ALL, "zh_HK.BIG5-HKSCS"); | |
560 | ||
561 | /* A pair of two byte UTF-8 code unit sequences that map a Unicode code | |
562 | point and combining character to a single double byte character. */ | |
563 | { | |
564 | /* U+00CA U+0304 => 0x88 0x62 */ | |
565 | const char8_t *u8s = (const char8_t*) u8"\u00CA\u0304"; | |
566 | char buf[MB_LEN_MAX] = { 0 }; | |
567 | mbstate_t s = { 0 }; | |
568 | ||
569 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
570 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
571 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0); | |
572 | TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) 2); | |
573 | TEST_COMPARE (buf[0], (char) 0x88); | |
574 | TEST_COMPARE (buf[1], (char) 0x62); | |
575 | TEST_VERIFY (mbsinit (&s)); | |
576 | } | |
577 | ||
578 | /* Another pair of two byte UTF-8 code unit sequences that map a Unicode code | |
579 | point and combining character to a single double byte character. */ | |
580 | { | |
581 | /* U+00EA U+030C => 0x88 0xA5 */ | |
582 | const char8_t *u8s = (const char8_t*) u8"\u00EA\u030C"; | |
583 | char buf[MB_LEN_MAX] = { 0 }; | |
584 | mbstate_t s = { 0 }; | |
585 | ||
586 | TEST_COMPARE (c8rtomb (buf, u8s[0], &s), (size_t) 0); | |
587 | TEST_COMPARE (c8rtomb (buf, u8s[1], &s), (size_t) 0); | |
588 | TEST_COMPARE (c8rtomb (buf, u8s[2], &s), (size_t) 0); | |
589 | TEST_COMPARE (c8rtomb (buf, u8s[3], &s), (size_t) 2); | |
590 | TEST_COMPARE (buf[0], (char) 0x88); | |
591 | TEST_COMPARE (buf[1], (char) 0xA5); | |
592 | TEST_VERIFY (mbsinit (&s)); | |
593 | } | |
594 | ||
595 | return 0; | |
596 | } | |
597 | ||
598 | static int | |
599 | do_test (void) | |
600 | { | |
601 | test_truncated_code_unit_sequence (); | |
602 | test_invalid_trailing_code_unit_sequence (); | |
603 | test_lone_trailing_code_units (); | |
604 | test_overlong_encoding (); | |
605 | test_surrogate_range (); | |
606 | test_out_of_range_encoding (); | |
607 | test_null_output_buffer (); | |
608 | test_utf8 (); | |
609 | test_big5_hkscs (); | |
610 | return 0; | |
611 | } | |
612 | ||
613 | #include <support/test-driver.c> |