]> git.ipfire.org Git - thirdparty/cups.git/blame - cups/testi18n.c
Merge pull request #1316 from weblate/weblate-cups-cups
[thirdparty/cups.git] / cups / testi18n.c
CommitLineData
ef416fc2 1/*
7e86f2f6 2 * Internationalization test for CUPS.
ef416fc2 3 *
76b6aade 4 * Copyright © 2020-2024 by OpenPrinting.
08e1d3f1
MS
5 * Copyright © 2007-2014 by Apple Inc.
6 * Copyright © 1997-2006 by Easy Software Products.
ef416fc2 7 *
08e1d3f1
MS
8 * Licensed under Apache License v2.0. See the file "LICENSE" for more
9 * information.
ef416fc2 10 */
11
12/*
13 * Include necessary headers...
14 */
15
52f361fb 16#include "cups.h"
71e16022
MS
17#include "string-private.h"
18#include "language-private.h"
ef416fc2 19#include <stdlib.h>
ef416fc2 20#include <time.h>
89d46774 21#include <unistd.h>
ef416fc2 22
ef416fc2 23
bf3816c7
MS
24/*
25 * Local globals...
26 */
27
28static const char * const lang_encodings[] =
29 { /* Encoding strings */
30 "us-ascii", "iso-8859-1",
31 "iso-8859-2", "iso-8859-3",
32 "iso-8859-4", "iso-8859-5",
33 "iso-8859-6", "iso-8859-7",
34 "iso-8859-8", "iso-8859-9",
35 "iso-8859-10", "utf-8",
36 "iso-8859-13", "iso-8859-14",
37 "iso-8859-15", "windows-874",
38 "windows-1250", "windows-1251",
39 "windows-1252", "windows-1253",
40 "windows-1254", "windows-1255",
41 "windows-1256", "windows-1257",
42 "windows-1258", "koi8-r",
43 "koi8-u", "iso-8859-11",
44 "iso-8859-16", "mac-roman",
45 "unknown", "unknown",
46 "unknown", "unknown",
47 "unknown", "unknown",
48 "unknown", "unknown",
49 "unknown", "unknown",
50 "unknown", "unknown",
51 "unknown", "unknown",
52 "unknown", "unknown",
53 "unknown", "unknown",
54 "unknown", "unknown",
55 "unknown", "unknown",
56 "unknown", "unknown",
57 "unknown", "unknown",
58 "unknown", "unknown",
59 "unknown", "unknown",
60 "unknown", "unknown",
61 "unknown", "unknown",
62 "windows-932", "windows-936",
63 "windows-949", "windows-950",
64 "windows-1361", "unknown",
65 "unknown", "unknown",
66 "unknown", "unknown",
67 "unknown", "unknown",
68 "unknown", "unknown",
69 "unknown", "unknown",
70 "unknown", "unknown",
71 "unknown", "unknown",
72 "unknown", "unknown",
73 "unknown", "unknown",
74 "unknown", "unknown",
75 "unknown", "unknown",
76 "unknown", "unknown",
77 "unknown", "unknown",
78 "unknown", "unknown",
79 "unknown", "unknown",
80 "unknown", "unknown",
81 "unknown", "unknown",
82 "unknown", "unknown",
83 "unknown", "unknown",
84 "unknown", "unknown",
85 "unknown", "unknown",
86 "unknown", "unknown",
87 "unknown", "unknown",
88 "unknown", "unknown",
89 "unknown", "unknown",
90 "unknown", "unknown",
91 "unknown", "unknown",
92 "unknown", "unknown",
93 "unknown", "unknown",
94 "euc-cn", "euc-jp",
95 "euc-kr", "euc-tw",
96 "jis-x0213"
97 };
98
99
ef416fc2 100/*
101 * Local functions...
102 */
103
ef416fc2 104static void print_utf8(const char *msg, const cups_utf8_t *src);
ef416fc2 105
106
107/*
108 * 'main()' - Main entry for internationalization test module.
109 */
110
111int /* O - Exit code */
112main(int argc, /* I - Argument Count */
113 char *argv[]) /* I - Arguments */
ef416fc2 114{
115 FILE *fp; /* File pointer */
116 int count; /* File line counter */
e1d6a774 117 int status, /* Status of current test */
118 errors; /* Error count */
ef416fc2 119 char line[1024]; /* File line source string */
120 int len; /* Length (count) of string */
e1d6a774 121 char legsrc[1024], /* Legacy source string */
122 legdest[1024], /* Legacy destination string */
123 *legptr; /* Pointer into legacy string */
ef416fc2 124 cups_utf8_t utf8latin[] = /* UTF-8 Latin-1 source */
125 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
126 /* "A != <A WITH DIAERESIS>." - use ISO 8859-1 */
127 cups_utf8_t utf8repla[] = /* UTF-8 Latin-1 replacement */
128 { 0x41, 0x20, 0xE2, 0x89, 0xA2, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
129 /* "A <NOT IDENTICAL TO> <A WITH DIAERESIS>." */
130 cups_utf8_t utf8greek[] = /* UTF-8 Greek source string */
131 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xCE, 0x91, 0x2E, 0x00 };
e1d6a774 132 /* "A != <ALPHA>." - use ISO 8859-7 */
ef416fc2 133 cups_utf8_t utf8japan[] = /* UTF-8 Japanese source */
134 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xEE, 0x9C, 0x80, 0x2E, 0x00 };
135 /* "A != <PRIVATE U+E700>." - use Windows 932 or EUC-JP */
136 cups_utf8_t utf8taiwan[] = /* UTF-8 Chinese source */
137 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xE4, 0xB9, 0x82, 0x2E, 0x00 };
138 /* "A != <CJK U+4E42>." - use Windows 950 (Big5) or EUC-TW */
ef416fc2 139 cups_utf8_t utf8dest[1024]; /* UTF-8 destination string */
ef416fc2 140 cups_utf32_t utf32dest[1024]; /* UTF-32 destination string */
ef416fc2 141
142
bf3816c7
MS
143 if (argc > 1)
144 {
145 int i; /* Looping var */
146 cups_encoding_t encoding; /* Source encoding */
147
148
149 if (argc != 3)
150 {
151 puts("Usage: ./testi18n [filename charset]");
152 return (1);
153 }
154
155 if ((fp = fopen(argv[1], "rb")) == NULL)
156 {
157 perror(argv[1]);
158 return (1);
159 }
160
161 for (i = 0, encoding = CUPS_AUTO_ENCODING;
162 i < (int)(sizeof(lang_encodings) / sizeof(lang_encodings[0]));
163 i ++)
88f9aafc 164 if (!_cups_strcasecmp(lang_encodings[i], argv[2]))
bf3816c7
MS
165 {
166 encoding = (cups_encoding_t)i;
167 break;
168 }
169
170 if (encoding == CUPS_AUTO_ENCODING)
171 {
172 fprintf(stderr, "%s: Unknown character set!\n", argv[2]);
c2f79527 173 fclose(fp);
bf3816c7
MS
174 return (1);
175 }
176
177 while (fgets(line, sizeof(line), fp))
178 {
179 if (cupsCharsetToUTF8(utf8dest, line, sizeof(utf8dest), encoding) < 0)
180 {
181 fprintf(stderr, "%s: Unable to convert line: %s", argv[1], line);
c2f79527 182 fclose(fp);
bf3816c7
MS
183 return (1);
184 }
185
186 fputs((char *)utf8dest, stdout);
187 }
188
189 fclose(fp);
190 return (0);
191 }
192
ef416fc2 193 /*
e1d6a774 194 * Start with some conversion tests from a UTF-8 test file.
ef416fc2 195 */
196
e1d6a774 197 errors = 0;
198
bf3816c7 199 if ((fp = fopen("utf8demo.txt", "rb")) == NULL)
ef416fc2 200 {
e1d6a774 201 perror("utf8demo.txt");
202 return (1);
ef416fc2 203 }
204
205 /*
e1d6a774 206 * cupsUTF8ToUTF32
ef416fc2 207 */
208
e1d6a774 209 fputs("cupsUTF8ToUTF32 of utfdemo.txt: ", stdout);
210
211 for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
ef416fc2 212 {
e1d6a774 213 count ++;
214
215 if (cupsUTF8ToUTF32(utf32dest, (cups_utf8_t *)line, 1024) < 0)
216 {
217 printf("FAIL (UTF-8 to UTF-32 on line %d)\n", count);
218 errors ++;
219 status = 1;
220 break;
221 }
ef416fc2 222 }
223
e1d6a774 224 if (!status)
225 puts("PASS");
ef416fc2 226
e1d6a774 227 /*
228 * cupsUTF8ToCharset(CUPS_EUC_JP)
229 */
ef416fc2 230
e1d6a774 231 fputs("cupsUTF8ToCharset(CUPS_EUC_JP) of utfdemo.txt: ", stdout);
ef416fc2 232
e1d6a774 233 rewind(fp);
234
235 for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
236 {
237 count ++;
ef416fc2 238
239 len = cupsUTF8ToCharset(legdest, (cups_utf8_t *)line, 1024, CUPS_EUC_JP);
240 if (len < 0)
e1d6a774 241 {
242 printf("FAIL (UTF-8 to EUC-JP on line %d)\n", count);
243 errors ++;
244 status = 1;
245 break;
246 }
ef416fc2 247 }
248
e1d6a774 249 if (!status)
250 puts("PASS");
251
ef416fc2 252 fclose(fp);
253
ef416fc2 254 /*
255 * Test UTF-8 to legacy charset (ISO 8859-1)...
256 */
257
e1d6a774 258 fputs("cupsUTF8ToCharset(CUPS_ISO8859_1): ", stdout);
ef416fc2 259
260 legdest[0] = 0;
261
262 len = cupsUTF8ToCharset(legdest, utf8latin, 1024, CUPS_ISO8859_1);
263 if (len < 0)
ef416fc2 264 {
e1d6a774 265 printf("FAIL (len=%d)\n", len);
266 errors ++;
ef416fc2 267 }
e1d6a774 268 else
269 puts("PASS");
ef416fc2 270
e1d6a774 271 /*
272 * cupsCharsetToUTF8
273 */
ef416fc2 274
e1d6a774 275 fputs("cupsCharsetToUTF8(CUPS_ISO8859_1): ", stdout);
ef416fc2 276
6ac4da6b 277 cupsCopyString(legsrc, legdest, sizeof(legsrc));
ef416fc2 278
e1d6a774 279 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_1);
7e86f2f6 280 if ((size_t)len != strlen((char *)utf8latin))
e1d6a774 281 {
89d46774 282 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8latin));
e1d6a774 283 print_utf8(" utf8latin", utf8latin);
284 print_utf8(" utf8dest", utf8dest);
285 errors ++;
286 }
7e86f2f6 287 else if (memcmp(utf8latin, utf8dest, (size_t)len))
ef416fc2 288 {
e1d6a774 289 puts("FAIL (results do not match)");
290 print_utf8(" utf8latin", utf8latin);
291 print_utf8(" utf8dest", utf8dest);
292 errors ++;
ef416fc2 293 }
e1d6a774 294 else if (cupsUTF8ToCharset(legdest, utf8repla, 1024, CUPS_ISO8859_1) < 0)
295 {
296 puts("FAIL (replacement characters do not work!)");
297 errors ++;
298 }
299 else
300 puts("PASS");
ef416fc2 301
302 /*
e1d6a774 303 * Test UTF-8 to/from legacy charset (ISO 8859-7)...
ef416fc2 304 */
e1d6a774 305
306 fputs("cupsUTF8ToCharset(CUPS_ISO8859_7): ", stdout);
307
308 if (cupsUTF8ToCharset(legdest, utf8greek, 1024, CUPS_ISO8859_7) < 0)
ef416fc2 309 {
e1d6a774 310 puts("FAIL");
311 errors ++;
ef416fc2 312 }
e1d6a774 313 else
314 {
315 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
316
317 if (*legptr)
318 {
319 puts("FAIL (unknown character)");
320 errors ++;
321 }
322 else
323 puts("PASS");
324 }
325
326 fputs("cupsCharsetToUTF8(CUPS_ISO8859_7): ", stdout);
327
6ac4da6b 328 cupsCopyString(legsrc, legdest, sizeof(legsrc));
e1d6a774 329
ef416fc2 330 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_7);
7e86f2f6 331 if ((size_t)len != strlen((char *)utf8greek))
e1d6a774 332 {
89d46774 333 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8greek));
e1d6a774 334 print_utf8(" utf8greek", utf8greek);
335 print_utf8(" utf8dest", utf8dest);
336 errors ++;
337 }
7e86f2f6 338 else if (memcmp(utf8greek, utf8dest, (size_t)len))
e1d6a774 339 {
340 puts("FAIL (results do not match)");
341 print_utf8(" utf8greek", utf8greek);
342 print_utf8(" utf8dest", utf8dest);
343 errors ++;
344 }
345 else
346 puts("PASS");
ef416fc2 347
348 /*
e1d6a774 349 * Test UTF-8 to/from legacy charset (Windows 932)...
ef416fc2 350 */
e1d6a774 351
352 fputs("cupsUTF8ToCharset(CUPS_WINDOWS_932): ", stdout);
353
354 if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_WINDOWS_932) < 0)
ef416fc2 355 {
e1d6a774 356 puts("FAIL");
357 errors ++;
ef416fc2 358 }
e1d6a774 359 else
360 {
361 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
362
363 if (*legptr)
364 {
365 puts("FAIL (unknown character)");
366 errors ++;
367 }
368 else
369 puts("PASS");
370 }
371
372 fputs("cupsCharsetToUTF8(CUPS_WINDOWS_932): ", stdout);
373
6ac4da6b 374 cupsCopyString(legsrc, legdest, sizeof(legsrc));
e1d6a774 375
ef416fc2 376 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_932);
7e86f2f6 377 if ((size_t)len != strlen((char *)utf8japan))
e1d6a774 378 {
89d46774 379 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
e1d6a774 380 print_utf8(" utf8japan", utf8japan);
381 print_utf8(" utf8dest", utf8dest);
382 errors ++;
383 }
7e86f2f6 384 else if (memcmp(utf8japan, utf8dest, (size_t)len))
e1d6a774 385 {
386 puts("FAIL (results do not match)");
387 print_utf8(" utf8japan", utf8japan);
388 print_utf8(" utf8dest", utf8dest);
389 errors ++;
390 }
391 else
392 puts("PASS");
ef416fc2 393
394 /*
e1d6a774 395 * Test UTF-8 to/from legacy charset (EUC-JP)...
ef416fc2 396 */
e1d6a774 397
398 fputs("cupsUTF8ToCharset(CUPS_EUC_JP): ", stdout);
399
400 if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_EUC_JP) < 0)
ef416fc2 401 {
e1d6a774 402 puts("FAIL");
403 errors ++;
ef416fc2 404 }
e1d6a774 405 else
406 {
407 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
408
409 if (*legptr)
410 {
411 puts("FAIL (unknown character)");
412 errors ++;
413 }
414 else
415 puts("PASS");
416 }
417
08e1d3f1 418#if 0 /* Failing and not sure why, might be an iconv issue? */
e1d6a774 419 fputs("cupsCharsetToUTF8(CUPS_EUC_JP): ", stdout);
420
6ac4da6b 421 cupsCopyString(legsrc, legdest, sizeof(legsrc));
e1d6a774 422
ef416fc2 423 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_JP);
7e86f2f6 424 if ((size_t)len != strlen((char *)utf8japan))
e1d6a774 425 {
89d46774 426 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
e1d6a774 427 print_utf8(" utf8japan", utf8japan);
428 print_utf8(" utf8dest", utf8dest);
429 errors ++;
430 }
7e86f2f6 431 else if (memcmp(utf8japan, utf8dest, (size_t)len))
e1d6a774 432 {
433 puts("FAIL (results do not match)");
434 print_utf8(" utf8japan", utf8japan);
435 print_utf8(" utf8dest", utf8dest);
436 errors ++;
437 }
438 else
439 puts("PASS");
08e1d3f1 440#endif /* 0 */
ef416fc2 441
442 /*
e1d6a774 443 * Test UTF-8 to/from legacy charset (Windows 950)...
ef416fc2 444 */
e1d6a774 445
446 fputs("cupsUTF8ToCharset(CUPS_WINDOWS_950): ", stdout);
447
448 if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_WINDOWS_950) < 0)
449 {
450 puts("FAIL");
451 errors ++;
452 }
453 else
ef416fc2 454 {
e1d6a774 455 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
456
457 if (*legptr)
458 {
459 puts("FAIL (unknown character)");
460 errors ++;
461 }
462 else
463 puts("PASS");
ef416fc2 464 }
e1d6a774 465
466 fputs("cupsCharsetToUTF8(CUPS_WINDOWS_950): ", stdout);
467
6ac4da6b 468 cupsCopyString(legsrc, legdest, sizeof(legsrc));
e1d6a774 469
ef416fc2 470 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_950);
7e86f2f6 471 if ((size_t)len != strlen((char *)utf8taiwan))
e1d6a774 472 {
89d46774 473 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
e1d6a774 474 print_utf8(" utf8taiwan", utf8taiwan);
475 print_utf8(" utf8dest", utf8dest);
476 errors ++;
477 }
7e86f2f6 478 else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
e1d6a774 479 {
480 puts("FAIL (results do not match)");
481 print_utf8(" utf8taiwan", utf8taiwan);
482 print_utf8(" utf8dest", utf8dest);
483 errors ++;
484 }
485 else
486 puts("PASS");
ef416fc2 487
488 /*
e1d6a774 489 * Test UTF-8 to/from legacy charset (EUC-TW)...
ef416fc2 490 */
e1d6a774 491
492 fputs("cupsUTF8ToCharset(CUPS_EUC_TW): ", stdout);
493
494 if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_EUC_TW) < 0)
495 {
496 puts("FAIL");
497 errors ++;
498 }
499 else
ef416fc2 500 {
e1d6a774 501 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
502
503 if (*legptr)
504 {
505 puts("FAIL (unknown character)");
506 errors ++;
507 }
508 else
509 puts("PASS");
ef416fc2 510 }
e1d6a774 511
512 fputs("cupsCharsetToUTF8(CUPS_EUC_TW): ", stdout);
513
6ac4da6b 514 cupsCopyString(legsrc, legdest, sizeof(legsrc));
e1d6a774 515
ef416fc2 516 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_TW);
7e86f2f6 517 if ((size_t)len != strlen((char *)utf8taiwan))
e1d6a774 518 {
89d46774 519 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
e1d6a774 520 print_utf8(" utf8taiwan", utf8taiwan);
521 print_utf8(" utf8dest", utf8dest);
522 errors ++;
523 }
7e86f2f6 524 else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
e1d6a774 525 {
526 puts("FAIL (results do not match)");
527 print_utf8(" utf8taiwan", utf8taiwan);
528 print_utf8(" utf8dest", utf8dest);
529 errors ++;
530 }
531 else
532 puts("PASS");
ef416fc2 533
e1d6a774 534#if 0
ef416fc2 535 /*
536 * Test UTF-8 (16-bit) to UTF-32 (w/ BOM)...
537 */
538 if (verbose)
539 printf("\ntesti18n: Testing UTF-8 to UTF-32 (w/ BOM)...\n");
540 len = cupsUTF8ToUTF32(utf32dest, utf8good, 1024);
541 if (len < 0)
542 return (1);
543 if (verbose)
544 {
545 print_utf8(" utf8good ", utf8good);
546 print_utf32(" utf32dest", utf32dest);
547 }
7e86f2f6 548 memcpy(utf32src, utf32dest, (len + 1) * sizeof(cups_utf32_t));
ef416fc2 549 len = cupsUTF32ToUTF8(utf8dest, utf32src, 1024);
550 if (len < 0)
551 return (1);
552 if (len != strlen ((char *) utf8good))
553 return (1);
554 if (memcmp(utf8good, utf8dest, len) != 0)
555 return (1);
556
557 /*
558 * Test invalid UTF-8 (16-bit) to UTF-32 (w/ BOM)...
559 */
560 if (verbose)
561 printf("\ntesti18n: Testing UTF-8 bad 16-bit source string...\n");
562 len = cupsUTF8ToUTF32(utf32dest, utf8bad, 1024);
563 if (len >= 0)
564 return (1);
565 if (verbose)
566 print_utf8(" utf8bad ", utf8bad);
567
568 /*
e1d6a774 569 * Test _cupsCharmapFlush()...
ef416fc2 570 */
571 if (verbose)
e1d6a774 572 printf("\ntesti18n: Testing _cupsCharmapFlush()...\n");
573 _cupsCharmapFlush();
ef416fc2 574 return (0);
e1d6a774 575#endif /* 0 */
576
577 return (errors > 0);
ef416fc2 578}
579
580
581/*
e1d6a774 582 * 'print_utf8()' - Print UTF-8 string with (optional) message.
ef416fc2 583 */
584
e1d6a774 585static void
586print_utf8(const char *msg, /* I - Message String */
587 const cups_utf8_t *src) /* I - UTF-8 Source String */
ef416fc2 588{
58dc1933
MS
589 const char *prefix; /* Prefix string */
590
591
e1d6a774 592 if (msg)
593 printf("%s:", msg);
ef416fc2 594
58dc1933
MS
595 for (prefix = " "; *src; src ++)
596 {
597 printf("%s%02x", prefix, *src);
598
599 if ((src[0] & 0x80) && (src[1] & 0x80))
600 prefix = "";
601 else
602 prefix = " ";
603 }
ef416fc2 604
e1d6a774 605 putchar('\n');
606}