]> git.ipfire.org Git - thirdparty/cups.git/blob - cups/testi18n.c
Merge changes from CUPS 1.5svn-r9049 (private header support)
[thirdparty/cups.git] / cups / testi18n.c
1 /*
2 * "$Id: testi18n.c 7560 2008-05-13 06:34:04Z mike $"
3 *
4 * Internationalization test for CUPS.
5 *
6 * Copyright 2007-2010 by Apple Inc.
7 * Copyright 1997-2006 by Easy Software Products.
8 *
9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
14 *
15 * This file is subject to the Apple OS-Developed Software exception.
16 *
17 * Contents:
18 *
19 * main() - Main entry for internationalization test module.
20 * print_utf8() - Print UTF-8 string with (optional) message.
21 */
22
23 /*
24 * Include necessary headers...
25 */
26
27 #include "string-private.h"
28 #include "language-private.h"
29 #include <stdlib.h>
30 #include <time.h>
31 #include <unistd.h>
32
33
34 /*
35 * Local globals...
36 */
37
38 static const char * const lang_encodings[] =
39 { /* Encoding strings */
40 "us-ascii", "iso-8859-1",
41 "iso-8859-2", "iso-8859-3",
42 "iso-8859-4", "iso-8859-5",
43 "iso-8859-6", "iso-8859-7",
44 "iso-8859-8", "iso-8859-9",
45 "iso-8859-10", "utf-8",
46 "iso-8859-13", "iso-8859-14",
47 "iso-8859-15", "windows-874",
48 "windows-1250", "windows-1251",
49 "windows-1252", "windows-1253",
50 "windows-1254", "windows-1255",
51 "windows-1256", "windows-1257",
52 "windows-1258", "koi8-r",
53 "koi8-u", "iso-8859-11",
54 "iso-8859-16", "mac-roman",
55 "unknown", "unknown",
56 "unknown", "unknown",
57 "unknown", "unknown",
58 "unknown", "unknown",
59 "unknown", "unknown",
60 "unknown", "unknown",
61 "unknown", "unknown",
62 "unknown", "unknown",
63 "unknown", "unknown",
64 "unknown", "unknown",
65 "unknown", "unknown",
66 "unknown", "unknown",
67 "unknown", "unknown",
68 "unknown", "unknown",
69 "unknown", "unknown",
70 "unknown", "unknown",
71 "unknown", "unknown",
72 "windows-932", "windows-936",
73 "windows-949", "windows-950",
74 "windows-1361", "unknown",
75 "unknown", "unknown",
76 "unknown", "unknown",
77 "unknown", "unknown",
78 "unknown", "unknown",
79 "unknown", "unknown",
80 "unknown", "unknown",
81 "unknown", "unknown",
82 "unknown", "unknown",
83 "unknown", "unknown",
84 "unknown", "unknown",
85 "unknown", "unknown",
86 "unknown", "unknown",
87 "unknown", "unknown",
88 "unknown", "unknown",
89 "unknown", "unknown",
90 "unknown", "unknown",
91 "unknown", "unknown",
92 "unknown", "unknown",
93 "unknown", "unknown",
94 "unknown", "unknown",
95 "unknown", "unknown",
96 "unknown", "unknown",
97 "unknown", "unknown",
98 "unknown", "unknown",
99 "unknown", "unknown",
100 "unknown", "unknown",
101 "unknown", "unknown",
102 "unknown", "unknown",
103 "unknown", "unknown",
104 "euc-cn", "euc-jp",
105 "euc-kr", "euc-tw",
106 "jis-x0213"
107 };
108
109
110 /*
111 * Local functions...
112 */
113
114 static void print_utf8(const char *msg, const cups_utf8_t *src);
115
116
117 /*
118 * 'main()' - Main entry for internationalization test module.
119 */
120
121 int /* O - Exit code */
122 main(int argc, /* I - Argument Count */
123 char *argv[]) /* I - Arguments */
124 {
125 FILE *fp; /* File pointer */
126 int count; /* File line counter */
127 int status, /* Status of current test */
128 errors; /* Error count */
129 char line[1024]; /* File line source string */
130 int len; /* Length (count) of string */
131 char legsrc[1024], /* Legacy source string */
132 legdest[1024], /* Legacy destination string */
133 *legptr; /* Pointer into legacy string */
134 cups_utf8_t utf8latin[] = /* UTF-8 Latin-1 source */
135 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
136 /* "A != <A WITH DIAERESIS>." - use ISO 8859-1 */
137 cups_utf8_t utf8repla[] = /* UTF-8 Latin-1 replacement */
138 { 0x41, 0x20, 0xE2, 0x89, 0xA2, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
139 /* "A <NOT IDENTICAL TO> <A WITH DIAERESIS>." */
140 cups_utf8_t utf8greek[] = /* UTF-8 Greek source string */
141 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xCE, 0x91, 0x2E, 0x00 };
142 /* "A != <ALPHA>." - use ISO 8859-7 */
143 cups_utf8_t utf8japan[] = /* UTF-8 Japanese source */
144 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xEE, 0x9C, 0x80, 0x2E, 0x00 };
145 /* "A != <PRIVATE U+E700>." - use Windows 932 or EUC-JP */
146 cups_utf8_t utf8taiwan[] = /* UTF-8 Chinese source */
147 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xE4, 0xB9, 0x82, 0x2E, 0x00 };
148 /* "A != <CJK U+4E42>." - use Windows 950 (Big5) or EUC-TW */
149 cups_utf8_t utf8dest[1024]; /* UTF-8 destination string */
150 cups_utf32_t utf32dest[1024]; /* UTF-32 destination string */
151
152
153 if (argc > 1)
154 {
155 int i; /* Looping var */
156 cups_encoding_t encoding; /* Source encoding */
157
158
159 if (argc != 3)
160 {
161 puts("Usage: ./testi18n [filename charset]");
162 return (1);
163 }
164
165 if ((fp = fopen(argv[1], "rb")) == NULL)
166 {
167 perror(argv[1]);
168 return (1);
169 }
170
171 for (i = 0, encoding = CUPS_AUTO_ENCODING;
172 i < (int)(sizeof(lang_encodings) / sizeof(lang_encodings[0]));
173 i ++)
174 if (!strcasecmp(lang_encodings[i], argv[2]))
175 {
176 encoding = (cups_encoding_t)i;
177 break;
178 }
179
180 if (encoding == CUPS_AUTO_ENCODING)
181 {
182 fprintf(stderr, "%s: Unknown character set!\n", argv[2]);
183 return (1);
184 }
185
186 while (fgets(line, sizeof(line), fp))
187 {
188 if (cupsCharsetToUTF8(utf8dest, line, sizeof(utf8dest), encoding) < 0)
189 {
190 fprintf(stderr, "%s: Unable to convert line: %s", argv[1], line);
191 return (1);
192 }
193
194 fputs((char *)utf8dest, stdout);
195 }
196
197 fclose(fp);
198 return (0);
199 }
200
201 /*
202 * Make sure we have a symbolic link from the data directory to a
203 * "charmaps" directory, and then point the library at it...
204 */
205
206 if (access("charmaps", 0))
207 symlink("../data", "charmaps");
208
209 putenv("CUPS_DATADIR=.");
210
211 /*
212 * Start with some conversion tests from a UTF-8 test file.
213 */
214
215 errors = 0;
216
217 if ((fp = fopen("utf8demo.txt", "rb")) == NULL)
218 {
219 perror("utf8demo.txt");
220 return (1);
221 }
222
223 /*
224 * cupsUTF8ToUTF32
225 */
226
227 fputs("cupsUTF8ToUTF32 of utfdemo.txt: ", stdout);
228
229 for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
230 {
231 count ++;
232
233 if (cupsUTF8ToUTF32(utf32dest, (cups_utf8_t *)line, 1024) < 0)
234 {
235 printf("FAIL (UTF-8 to UTF-32 on line %d)\n", count);
236 errors ++;
237 status = 1;
238 break;
239 }
240 }
241
242 if (!status)
243 puts("PASS");
244
245 /*
246 * cupsUTF8ToCharset(CUPS_EUC_JP)
247 */
248
249 fputs("cupsUTF8ToCharset(CUPS_EUC_JP) of utfdemo.txt: ", stdout);
250
251 rewind(fp);
252
253 for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
254 {
255 count ++;
256
257 len = cupsUTF8ToCharset(legdest, (cups_utf8_t *)line, 1024, CUPS_EUC_JP);
258 if (len < 0)
259 {
260 printf("FAIL (UTF-8 to EUC-JP on line %d)\n", count);
261 errors ++;
262 status = 1;
263 break;
264 }
265 }
266
267 if (!status)
268 puts("PASS");
269
270 fclose(fp);
271
272 /*
273 * Test charmap load for ISO-8859-1...
274 */
275
276 fputs("_cupsCharmapGet(CUPS_ISO8859_1): ", stdout);
277
278 if (!_cupsCharmapGet(CUPS_ISO8859_1))
279 {
280 errors ++;
281 puts("FAIL");
282 }
283 else
284 puts("PASS");
285
286 /*
287 * Test charmap load for Windows-932 (Shift-JIS)...
288 */
289
290 fputs("_cupsCharmapGet(CUPS_WINDOWS_932): ", stdout);
291
292 if (!_cupsCharmapGet(CUPS_WINDOWS_932))
293 {
294 errors ++;
295 puts("FAIL");
296 }
297 else
298 puts("PASS");
299
300 /*
301 * Test VBCS charmap load for EUC-JP...
302 */
303
304 fputs("_cupsCharmapGet(CUPS_EUC_JP): ", stdout);
305
306 if (!_cupsCharmapGet(CUPS_EUC_JP))
307 {
308 errors ++;
309 puts("FAIL");
310 }
311 else
312 puts("PASS");
313
314 /*
315 * Test VBCS charmap load for EUC-TW...
316 */
317
318 fputs("_cupsCharmapGet(CUPS_EUC_TW): ", stdout);
319
320 if (!_cupsCharmapGet(CUPS_EUC_TW))
321 {
322 errors ++;
323 puts("FAIL");
324 }
325 else
326 puts("PASS");
327
328 /*
329 * Test UTF-8 to legacy charset (ISO 8859-1)...
330 */
331
332 fputs("cupsUTF8ToCharset(CUPS_ISO8859_1): ", stdout);
333
334 legdest[0] = 0;
335
336 len = cupsUTF8ToCharset(legdest, utf8latin, 1024, CUPS_ISO8859_1);
337 if (len < 0)
338 {
339 printf("FAIL (len=%d)\n", len);
340 errors ++;
341 }
342 else
343 puts("PASS");
344
345 /*
346 * cupsCharsetToUTF8
347 */
348
349 fputs("cupsCharsetToUTF8(CUPS_ISO8859_1): ", stdout);
350
351 strcpy(legsrc, legdest);
352
353 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_1);
354 if (len != strlen((char *)utf8latin))
355 {
356 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8latin));
357 print_utf8(" utf8latin", utf8latin);
358 print_utf8(" utf8dest", utf8dest);
359 errors ++;
360 }
361 else if (memcmp(utf8latin, utf8dest, len))
362 {
363 puts("FAIL (results do not match)");
364 print_utf8(" utf8latin", utf8latin);
365 print_utf8(" utf8dest", utf8dest);
366 errors ++;
367 }
368 else if (cupsUTF8ToCharset(legdest, utf8repla, 1024, CUPS_ISO8859_1) < 0)
369 {
370 puts("FAIL (replacement characters do not work!)");
371 errors ++;
372 }
373 else
374 puts("PASS");
375
376 /*
377 * Test UTF-8 to/from legacy charset (ISO 8859-7)...
378 */
379
380 fputs("cupsUTF8ToCharset(CUPS_ISO8859_7): ", stdout);
381
382 if (cupsUTF8ToCharset(legdest, utf8greek, 1024, CUPS_ISO8859_7) < 0)
383 {
384 puts("FAIL");
385 errors ++;
386 }
387 else
388 {
389 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
390
391 if (*legptr)
392 {
393 puts("FAIL (unknown character)");
394 errors ++;
395 }
396 else
397 puts("PASS");
398 }
399
400 fputs("cupsCharsetToUTF8(CUPS_ISO8859_7): ", stdout);
401
402 strcpy(legsrc, legdest);
403
404 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_7);
405 if (len != strlen((char *)utf8greek))
406 {
407 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8greek));
408 print_utf8(" utf8greek", utf8greek);
409 print_utf8(" utf8dest", utf8dest);
410 errors ++;
411 }
412 else if (memcmp(utf8greek, utf8dest, len))
413 {
414 puts("FAIL (results do not match)");
415 print_utf8(" utf8greek", utf8greek);
416 print_utf8(" utf8dest", utf8dest);
417 errors ++;
418 }
419 else
420 puts("PASS");
421
422 /*
423 * Test UTF-8 to/from legacy charset (Windows 932)...
424 */
425
426 fputs("cupsUTF8ToCharset(CUPS_WINDOWS_932): ", stdout);
427
428 if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_WINDOWS_932) < 0)
429 {
430 puts("FAIL");
431 errors ++;
432 }
433 else
434 {
435 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
436
437 if (*legptr)
438 {
439 puts("FAIL (unknown character)");
440 errors ++;
441 }
442 else
443 puts("PASS");
444 }
445
446 fputs("cupsCharsetToUTF8(CUPS_WINDOWS_932): ", stdout);
447
448 strcpy(legsrc, legdest);
449
450 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_932);
451 if (len != strlen((char *)utf8japan))
452 {
453 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
454 print_utf8(" utf8japan", utf8japan);
455 print_utf8(" utf8dest", utf8dest);
456 errors ++;
457 }
458 else if (memcmp(utf8japan, utf8dest, len))
459 {
460 puts("FAIL (results do not match)");
461 print_utf8(" utf8japan", utf8japan);
462 print_utf8(" utf8dest", utf8dest);
463 errors ++;
464 }
465 else
466 puts("PASS");
467
468 /*
469 * Test UTF-8 to/from legacy charset (EUC-JP)...
470 */
471
472 fputs("cupsUTF8ToCharset(CUPS_EUC_JP): ", stdout);
473
474 if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_EUC_JP) < 0)
475 {
476 puts("FAIL");
477 errors ++;
478 }
479 else
480 {
481 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
482
483 if (*legptr)
484 {
485 puts("FAIL (unknown character)");
486 errors ++;
487 }
488 else
489 puts("PASS");
490 }
491
492 fputs("cupsCharsetToUTF8(CUPS_EUC_JP): ", stdout);
493
494 strcpy(legsrc, legdest);
495
496 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_JP);
497 if (len != strlen((char *)utf8japan))
498 {
499 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
500 print_utf8(" utf8japan", utf8japan);
501 print_utf8(" utf8dest", utf8dest);
502 errors ++;
503 }
504 else if (memcmp(utf8japan, utf8dest, len))
505 {
506 puts("FAIL (results do not match)");
507 print_utf8(" utf8japan", utf8japan);
508 print_utf8(" utf8dest", utf8dest);
509 errors ++;
510 }
511 else
512 puts("PASS");
513
514 /*
515 * Test UTF-8 to/from legacy charset (Windows 950)...
516 */
517
518 fputs("cupsUTF8ToCharset(CUPS_WINDOWS_950): ", stdout);
519
520 if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_WINDOWS_950) < 0)
521 {
522 puts("FAIL");
523 errors ++;
524 }
525 else
526 {
527 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
528
529 if (*legptr)
530 {
531 puts("FAIL (unknown character)");
532 errors ++;
533 }
534 else
535 puts("PASS");
536 }
537
538 fputs("cupsCharsetToUTF8(CUPS_WINDOWS_950): ", stdout);
539
540 strcpy(legsrc, legdest);
541
542 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_950);
543 if (len != strlen((char *)utf8taiwan))
544 {
545 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
546 print_utf8(" utf8taiwan", utf8taiwan);
547 print_utf8(" utf8dest", utf8dest);
548 errors ++;
549 }
550 else if (memcmp(utf8taiwan, utf8dest, len))
551 {
552 puts("FAIL (results do not match)");
553 print_utf8(" utf8taiwan", utf8taiwan);
554 print_utf8(" utf8dest", utf8dest);
555 errors ++;
556 }
557 else
558 puts("PASS");
559
560 /*
561 * Test UTF-8 to/from legacy charset (EUC-TW)...
562 */
563
564 fputs("cupsUTF8ToCharset(CUPS_EUC_TW): ", stdout);
565
566 if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_EUC_TW) < 0)
567 {
568 puts("FAIL");
569 errors ++;
570 }
571 else
572 {
573 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
574
575 if (*legptr)
576 {
577 puts("FAIL (unknown character)");
578 errors ++;
579 }
580 else
581 puts("PASS");
582 }
583
584 fputs("cupsCharsetToUTF8(CUPS_EUC_TW): ", stdout);
585
586 strcpy(legsrc, legdest);
587
588 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_TW);
589 if (len != strlen((char *)utf8taiwan))
590 {
591 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
592 print_utf8(" utf8taiwan", utf8taiwan);
593 print_utf8(" utf8dest", utf8dest);
594 errors ++;
595 }
596 else if (memcmp(utf8taiwan, utf8dest, len))
597 {
598 puts("FAIL (results do not match)");
599 print_utf8(" utf8taiwan", utf8taiwan);
600 print_utf8(" utf8dest", utf8dest);
601 errors ++;
602 }
603 else
604 puts("PASS");
605
606 #if 0
607 /*
608 * Test UTF-8 (16-bit) to UTF-32 (w/ BOM)...
609 */
610 if (verbose)
611 printf("\ntesti18n: Testing UTF-8 to UTF-32 (w/ BOM)...\n");
612 len = cupsUTF8ToUTF32(utf32dest, utf8good, 1024);
613 if (len < 0)
614 return (1);
615 if (verbose)
616 {
617 print_utf8(" utf8good ", utf8good);
618 print_utf32(" utf32dest", utf32dest);
619 }
620 memcpy (utf32src, utf32dest, (len + 1) * sizeof(cups_utf32_t));
621 len = cupsUTF32ToUTF8(utf8dest, utf32src, 1024);
622 if (len < 0)
623 return (1);
624 if (len != strlen ((char *) utf8good))
625 return (1);
626 if (memcmp(utf8good, utf8dest, len) != 0)
627 return (1);
628
629 /*
630 * Test invalid UTF-8 (16-bit) to UTF-32 (w/ BOM)...
631 */
632 if (verbose)
633 printf("\ntesti18n: Testing UTF-8 bad 16-bit source string...\n");
634 len = cupsUTF8ToUTF32(utf32dest, utf8bad, 1024);
635 if (len >= 0)
636 return (1);
637 if (verbose)
638 print_utf8(" utf8bad ", utf8bad);
639
640 /*
641 * Test _cupsCharmapFlush()...
642 */
643 if (verbose)
644 printf("\ntesti18n: Testing _cupsCharmapFlush()...\n");
645 _cupsCharmapFlush();
646 return (0);
647 #endif /* 0 */
648
649 return (errors > 0);
650 }
651
652
653 /*
654 * 'print_utf8()' - Print UTF-8 string with (optional) message.
655 */
656
657 static void
658 print_utf8(const char *msg, /* I - Message String */
659 const cups_utf8_t *src) /* I - UTF-8 Source String */
660 {
661 const char *prefix; /* Prefix string */
662
663
664 if (msg)
665 printf("%s:", msg);
666
667 for (prefix = " "; *src; src ++)
668 {
669 printf("%s%02x", prefix, *src);
670
671 if ((src[0] & 0x80) && (src[1] & 0x80))
672 prefix = "";
673 else
674 prefix = " ";
675 }
676
677 putchar('\n');
678 }
679
680
681 /*
682 * End of "$Id: testi18n.c 7560 2008-05-13 06:34:04Z mike $"
683 */