]> git.ipfire.org Git - thirdparty/cups.git/blob - cups/transcode.c
Merge changes from CUPS 1.5svn-r9313.
[thirdparty/cups.git] / cups / transcode.c
1 /*
2 * "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
3 *
4 * Transcoding support for CUPS.
5 *
6 * Copyright 2007-2010 by Apple Inc.
7 * Copyright 1997-2007 by Easy Software Products.
8 *
9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
14 *
15 * This file is subject to the Apple OS-Developed Software exception.
16 *
17 * Contents:
18 *
19 * _cupsCharmapFlush() - Flush all character set maps out of cache.
20 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
21 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
22 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
23 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
24 */
25
26 /*
27 * Include necessary headers...
28 */
29
30 #include "cups-private.h"
31 #include <limits.h>
32 #include <time.h>
33 #ifdef HAVE_ICONV_H
34 # include <iconv.h>
35 #endif /* HAVE_ICONV_H */
36
37
38 /*
39 * Local globals...
40 */
41
42 #ifdef HAVE_ICONV_H
43 static _cups_mutex_t map_mutex = _CUPS_MUTEX_INITIALIZER;
44 /* Mutex to control access to maps */
45 static iconv_t map_from_utf8 = (iconv_t)-1;
46 /* Convert from UTF-8 to charset */
47 static iconv_t map_to_utf8 = (iconv_t)-1;
48 /* Convert from charset to UTF-8 */
49 static cups_encoding_t map_encoding = CUPS_AUTO_ENCODING;
50 /* Which charset is cached */
51 #endif /* HAVE_ICONV_H */
52
53
54 /*
55 * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
56 */
57
58 void
59 _cupsCharmapFlush(void)
60 {
61 #ifdef HAVE_ICONV_H
62 if (map_from_utf8 != (iconv_t)-1)
63 {
64 iconv_close(map_from_utf8);
65 map_from_utf8 = (iconv_t)-1;
66 }
67
68 if (map_to_utf8 != (iconv_t)-1)
69 {
70 iconv_close(map_to_utf8);
71 map_to_utf8 = (iconv_t)-1;
72 }
73
74 map_encoding = CUPS_AUTO_ENCODING;
75 #endif /* HAVE_ICONV_H */
76 }
77
78
79 /*
80 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
81 */
82
83 int /* O - Count or -1 on error */
84 cupsCharsetToUTF8(
85 cups_utf8_t *dest, /* O - Target string */
86 const char *src, /* I - Source string */
87 const int maxout, /* I - Max output */
88 const cups_encoding_t encoding) /* I - Encoding */
89 {
90 cups_utf8_t *destptr; /* Pointer into UTF-8 buffer */
91 #ifdef HAVE_ICONV_H
92 size_t srclen, /* Length of source string */
93 outBytesLeft; /* Bytes remaining in output buffer */
94 #endif /* HAVE_ICONV_H */
95
96
97 /*
98 * Check for valid arguments...
99 */
100
101 DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)",
102 dest, src, maxout, encoding));
103
104 if (!dest || !src || maxout < 1)
105 {
106 if (dest)
107 *dest = '\0';
108
109 DEBUG_puts("3cupsCharsetToUTF8: Bad arguments, returning -1");
110 return (-1);
111 }
112
113 /*
114 * Handle identity conversions...
115 */
116
117 if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
118 encoding >= CUPS_ENCODING_VBCS_END)
119 {
120 strlcpy((char *)dest, src, maxout);
121 return ((int)strlen((char *)dest));
122 }
123
124 /*
125 * Handle ISO-8859-1 to UTF-8 directly...
126 */
127
128 destptr = dest;
129
130 if (encoding == CUPS_ISO8859_1)
131 {
132 int ch; /* Character from string */
133 cups_utf8_t *destend; /* End of UTF-8 buffer */
134
135
136 destend = dest + maxout - 2;
137
138 while (*src && destptr < destend)
139 {
140 ch = *src++ & 255;
141
142 if (ch & 128)
143 {
144 *destptr++ = 0xc0 | (ch >> 6);
145 *destptr++ = 0x80 | (ch & 0x3f);
146 }
147 else
148 *destptr++ = ch;
149 }
150
151 *destptr = '\0';
152
153 return ((int)(destptr - dest));
154 }
155
156 /*
157 * Convert input legacy charset to UTF-8...
158 */
159
160 #ifdef HAVE_ICONV_H
161 _cupsMutexLock(&map_mutex);
162
163 if (map_encoding != encoding)
164 {
165 _cupsCharmapFlush();
166
167 map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
168 map_to_utf8 = iconv_open("UTF-8", _cupsEncodingName(encoding));
169 map_encoding = encoding;
170 }
171
172 if (map_to_utf8 != (iconv_t)-1)
173 {
174 srclen = strlen(src);
175 outBytesLeft = maxout - 1;
176
177 iconv(map_to_utf8, (char **)&src, &srclen, (char **)&destptr,
178 &outBytesLeft);
179 *destptr = '\0';
180
181 _cupsMutexUnlock(&map_mutex);
182
183 return ((int)(destptr - dest));
184 }
185
186 _cupsMutexUnlock(&map_mutex);
187 #endif /* HAVE_ICONV_H */
188
189 /*
190 * No iconv() support, so error out...
191 */
192
193 *destptr = '\0';
194
195 return (-1);
196 }
197
198
199 /*
200 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
201 */
202
203 int /* O - Count or -1 on error */
204 cupsUTF8ToCharset(
205 char *dest, /* O - Target string */
206 const cups_utf8_t *src, /* I - Source string */
207 const int maxout, /* I - Max output */
208 const cups_encoding_t encoding) /* I - Encoding */
209 {
210 char *destptr; /* Pointer into destination */
211 #ifdef HAVE_ICONV_H
212 size_t srclen, /* Length of source string */
213 outBytesLeft; /* Bytes remaining in output buffer */
214 #endif /* HAVE_ICONV_H */
215
216
217 /*
218 * Check for valid arguments...
219 */
220
221 if (!dest || !src || maxout < 1)
222 {
223 if (dest)
224 *dest = '\0';
225
226 return (-1);
227 }
228
229 /*
230 * Handle identity conversions...
231 */
232
233 if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
234 encoding >= CUPS_ENCODING_VBCS_END)
235 {
236 strlcpy(dest, (char *)src, maxout);
237 return ((int)strlen(dest));
238 }
239
240 /*
241 * Handle UTF-8 to ISO-8859-1 directly...
242 */
243
244 destptr = dest;
245
246 if (encoding == CUPS_ISO8859_1)
247 {
248 int ch; /* Character from string */
249 char *destend; /* End of ISO-8859-1 buffer */
250
251
252 destend = dest + maxout - 1;
253
254 while (*src && destptr < destend)
255 {
256 ch = *src++;
257
258 if ((ch & 0xe0) == 0xc0)
259 {
260 ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
261
262 if (ch < 256)
263 *destptr++ = ch;
264 else
265 *destptr++ = '?';
266 }
267 else if ((ch & 0xf0) == 0xe0 ||
268 (ch & 0xf8) == 0xf0)
269 *destptr++ = '?';
270 else if (!(ch & 0x80))
271 *destptr++ = ch;
272 }
273
274 *destptr = '\0';
275
276 return ((int)(destptr - dest));
277 }
278
279 #ifdef HAVE_ICONV_H
280 /*
281 * Convert input UTF-8 to legacy charset...
282 */
283
284 _cupsMutexLock(&map_mutex);
285
286 if (map_encoding != encoding)
287 {
288 _cupsCharmapFlush();
289
290 map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
291 map_to_utf8 = iconv_open("UTF-8", _cupsEncodingName(encoding));
292 map_encoding = encoding;
293 }
294
295 if (map_from_utf8 != (iconv_t)-1)
296 {
297 srclen = strlen((char *)src);
298 outBytesLeft = maxout - 1;
299
300 iconv(map_from_utf8, (char **)&src, &srclen, &destptr, &outBytesLeft);
301 *destptr = '\0';
302
303 _cupsMutexUnlock(&map_mutex);
304
305 return ((int)(destptr - dest));
306 }
307
308 _cupsMutexUnlock(&map_mutex);
309 #endif /* HAVE_ICONV_H */
310
311 /*
312 * No iconv() support, so error out...
313 */
314
315 *destptr = '\0';
316
317 return (-1);
318 }
319
320
321 /*
322 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
323 *
324 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
325 *
326 * UTF-32 char UTF-8 char(s)
327 * --------------------------------------------------
328 * 0 to 127 = 0xxxxxxx (US-ASCII)
329 * 128 to 2047 = 110xxxxx 10yyyyyy
330 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
331 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
332 *
333 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
334 * which would convert to five- or six-octet UTF-8 sequences...
335 */
336
337 int /* O - Count or -1 on error */
338 cupsUTF8ToUTF32(
339 cups_utf32_t *dest, /* O - Target string */
340 const cups_utf8_t *src, /* I - Source string */
341 const int maxout) /* I - Max output */
342 {
343 int i; /* Looping variable */
344 cups_utf8_t ch; /* Character value */
345 cups_utf8_t next; /* Next character value */
346 cups_utf32_t ch32; /* UTF-32 character value */
347
348
349 /*
350 * Check for valid arguments and clear output...
351 */
352
353 DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", dest,
354 src, maxout));
355
356 if (dest)
357 *dest = 0;
358
359 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
360 {
361 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad arguments)");
362
363 return (-1);
364 }
365
366 /*
367 * Convert input UTF-8 to output UTF-32...
368 */
369
370 for (i = maxout - 1; *src && i > 0; i --)
371 {
372 ch = *src++;
373
374 /*
375 * Convert UTF-8 character(s) to UTF-32 character...
376 */
377
378 if (!(ch & 0x80))
379 {
380 /*
381 * One-octet UTF-8 <= 127 (US-ASCII)...
382 */
383
384 *dest++ = ch;
385
386 DEBUG_printf(("4cupsUTF8ToUTF32: %02x => %08X", src[-1], ch));
387 continue;
388 }
389 else if ((ch & 0xe0) == 0xc0)
390 {
391 /*
392 * Two-octet UTF-8 <= 2047 (Latin-x)...
393 */
394
395 next = *src++;
396 if ((next & 0xc0) != 0x80)
397 {
398 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
399
400 return (-1);
401 }
402
403 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
404
405 /*
406 * Check for non-shortest form (invalid UTF-8)...
407 */
408
409 if (ch32 < 0x80)
410 {
411 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
412
413 return (-1);
414 }
415
416 *dest++ = ch32;
417
418 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x => %08X",
419 src[-2], src[-1], (unsigned)ch32));
420 }
421 else if ((ch & 0xf0) == 0xe0)
422 {
423 /*
424 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
425 */
426
427 next = *src++;
428 if ((next & 0xc0) != 0x80)
429 {
430 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
431
432 return (-1);
433 }
434
435 ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
436
437 next = *src++;
438 if ((next & 0xc0) != 0x80)
439 {
440 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
441
442 return (-1);
443 }
444
445 ch32 = (ch32 << 6) | (next & 0x3f);
446
447 /*
448 * Check for non-shortest form (invalid UTF-8)...
449 */
450
451 if (ch32 < 0x800)
452 {
453 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
454
455 return (-1);
456 }
457
458 *dest++ = ch32;
459
460 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x => %08X",
461 src[-3], src[-2], src[-1], (unsigned)ch32));
462 }
463 else if ((ch & 0xf8) == 0xf0)
464 {
465 /*
466 * Four-octet UTF-8...
467 */
468
469 next = *src++;
470 if ((next & 0xc0) != 0x80)
471 {
472 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
473
474 return (-1);
475 }
476
477 ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
478
479 next = *src++;
480 if ((next & 0xc0) != 0x80)
481 {
482 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
483
484 return (-1);
485 }
486
487 ch32 = (ch32 << 6) | (next & 0x3f);
488
489 next = *src++;
490 if ((next & 0xc0) != 0x80)
491 {
492 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
493
494 return (-1);
495 }
496
497 ch32 = (ch32 << 6) | (next & 0x3f);
498
499 /*
500 * Check for non-shortest form (invalid UTF-8)...
501 */
502
503 if (ch32 < 0x10000)
504 {
505 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
506
507 return (-1);
508 }
509
510 *dest++ = ch32;
511
512 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X",
513 src[-4], src[-3], src[-2], src[-1], (unsigned)ch32));
514 }
515 else
516 {
517 /*
518 * More than 4-octet (invalid UTF-8 sequence)...
519 */
520
521 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
522
523 return (-1);
524 }
525
526 /*
527 * Check for UTF-16 surrogate (illegal UTF-8)...
528 */
529
530 if (ch32 >= 0xd800 && ch32 <= 0xdfff)
531 return (-1);
532 }
533
534 *dest = 0;
535
536 DEBUG_printf(("3cupsUTF8ToUTF32: Returning %d characters", maxout - 1 - i));
537
538 return (maxout - 1 - i);
539 }
540
541
542 /*
543 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
544 *
545 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
546 *
547 * UTF-32 char UTF-8 char(s)
548 * --------------------------------------------------
549 * 0 to 127 = 0xxxxxxx (US-ASCII)
550 * 128 to 2047 = 110xxxxx 10yyyyyy
551 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
552 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
553 *
554 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
555 * which would convert to five- or six-octet UTF-8 sequences...
556 */
557
558 int /* O - Count or -1 on error */
559 cupsUTF32ToUTF8(
560 cups_utf8_t *dest, /* O - Target string */
561 const cups_utf32_t *src, /* I - Source string */
562 const int maxout) /* I - Max output */
563 {
564 cups_utf8_t *start; /* Start of destination string */
565 int i; /* Looping variable */
566 int swap; /* Byte-swap input to output */
567 cups_utf32_t ch; /* Character value */
568
569
570 /*
571 * Check for valid arguments and clear output...
572 */
573
574 DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", dest, src,
575 maxout));
576
577 if (dest)
578 *dest = '\0';
579
580 if (!dest || !src || maxout < 1)
581 {
582 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (bad args)");
583
584 return (-1);
585 }
586
587 /*
588 * Check for leading BOM in UTF-32 and inverted BOM...
589 */
590
591 start = dest;
592 swap = *src == 0xfffe0000;
593
594 DEBUG_printf(("4cupsUTF32ToUTF8: swap=%d", swap));
595
596 if (*src == 0xfffe0000 || *src == 0xfeff)
597 src ++;
598
599 /*
600 * Convert input UTF-32 to output UTF-8...
601 */
602
603 for (i = maxout - 1; *src && i > 0;)
604 {
605 ch = *src++;
606
607 /*
608 * Byte swap input UTF-32, if necessary...
609 * (only byte-swapping 24 of 32 bits)
610 */
611
612 if (swap)
613 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
614
615 /*
616 * Check for beyond Plane 16 (invalid UTF-32)...
617 */
618
619 if (ch > 0x10ffff)
620 {
621 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (character out of range)");
622
623 return (-1);
624 }
625
626 /*
627 * Convert UTF-32 character to UTF-8 character(s)...
628 */
629
630 if (ch < 0x80)
631 {
632 /*
633 * One-octet UTF-8 <= 127 (US-ASCII)...
634 */
635
636 *dest++ = (cups_utf8_t)ch;
637 i --;
638
639 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x", (unsigned)ch, dest[-1]));
640 }
641 else if (ch < 0x800)
642 {
643 /*
644 * Two-octet UTF-8 <= 2047 (Latin-x)...
645 */
646
647 if (i < 2)
648 {
649 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 2)");
650
651 return (-1);
652 }
653
654 *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
655 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
656 i -= 2;
657
658 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x", (unsigned)ch,
659 dest[-2], dest[-1]));
660 }
661 else if (ch < 0x10000)
662 {
663 /*
664 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
665 */
666
667 if (i < 3)
668 {
669 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 3)");
670
671 return (-1);
672 }
673
674 *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
675 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
676 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
677 i -= 3;
678
679 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x", (unsigned)ch,
680 dest[-3], dest[-2], dest[-1]));
681 }
682 else
683 {
684 /*
685 * Four-octet UTF-8...
686 */
687
688 if (i < 4)
689 {
690 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 4)");
691
692 return (-1);
693 }
694
695 *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
696 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
697 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
698 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
699 i -= 4;
700
701 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x",
702 (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]));
703 }
704 }
705
706 *dest = '\0';
707
708 DEBUG_printf(("3cupsUTF32ToUTF8: Returning %d", (int)(dest - start)));
709
710 return ((int)(dest - start));
711 }
712
713
714 /*
715 * 'compare_wide()' - Compare key for wide (VBCS) match.
716 */
717
718 static int
719 compare_wide(const void *k1, /* I - Key char */
720 const void *k2) /* I - Map char */
721 {
722 cups_vbcs_t key; /* Legacy key character */
723 cups_vbcs_t map; /* Legacy map character */
724
725
726 key = *((cups_vbcs_t *)k1);
727 map = ((_cups_wide2uni_t *)k2)->widechar;
728
729 return ((int)(key - map));
730 }
731
732
733 /*
734 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
735 */
736
737 static int /* O - Count or -1 on error */
738 conv_sbcs_to_utf8(
739 cups_utf8_t *dest, /* O - Target string */
740 const cups_sbcs_t *src, /* I - Source string */
741 int maxout, /* I - Max output */
742 const cups_encoding_t encoding) /* I - Encoding */
743 {
744 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
745 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
746 cups_sbcs_t legchar; /* Legacy character value */
747 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
748 *workptr; /* Pointer into string */
749
750
751 /*
752 * Find legacy charset map in cache...
753 */
754
755 if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
756 return (-1);
757
758 /*
759 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
760 */
761
762 work[0] = 0xfeff;
763 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
764 {
765 legchar = *src++;
766
767 /*
768 * Convert ASCII verbatim (optimization)...
769 */
770
771 if (legchar < 0x80)
772 *workptr++ = (cups_utf32_t)legchar;
773 else
774 {
775 /*
776 * Convert unknown character to Replacement Character...
777 */
778
779 crow = cmap->char2uni + legchar;
780
781 if (!*crow)
782 *workptr++ = 0xfffd;
783 else
784 *workptr++ = (cups_utf32_t)*crow;
785 }
786 }
787
788 *workptr = 0;
789
790 /*
791 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
792 */
793
794 cmap->used --;
795
796 return (cupsUTF32ToUTF8(dest, work, maxout));
797 }
798
799
800 /*
801 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
802 */
803
804 static int /* O - Count or -1 on error */
805 conv_utf8_to_sbcs(
806 cups_sbcs_t *dest, /* O - Target string */
807 const cups_utf8_t *src, /* I - Source string */
808 int maxout, /* I - Max output */
809 const cups_encoding_t encoding) /* I - Encoding */
810 {
811 cups_sbcs_t *start; /* Start of destination string */
812 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
813 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
814 cups_utf32_t unichar; /* Character value */
815 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
816 *workptr; /* Pointer into string */
817
818
819 /*
820 * Find legacy charset map in cache...
821 */
822
823 if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
824 return (-1);
825
826 /*
827 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
828 */
829
830 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
831 return (-1);
832
833 /*
834 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
835 */
836
837 for (workptr = work, start = dest; *workptr && maxout > 0; maxout --)
838 {
839 unichar = *workptr++;
840 if (!unichar)
841 break;
842
843 /*
844 * Convert ASCII verbatim (optimization)...
845 */
846
847 if (unichar < 0x80)
848 {
849 *dest++ = (cups_sbcs_t)unichar;
850 continue;
851 }
852
853 /*
854 * Convert unknown character to visible replacement...
855 */
856
857 srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
858
859 if (srow)
860 srow += (int)(unichar & 0xff);
861
862 if (!srow || !*srow)
863 *dest++ = '?';
864 else
865 *dest++ = *srow;
866 }
867
868 *dest = '\0';
869
870 cmap->used --;
871
872 return ((int)(dest - start));
873 }
874
875
876 /*
877 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
878 */
879
880 static int /* O - Count or -1 on error */
881 conv_utf8_to_vbcs(
882 cups_sbcs_t *dest, /* O - Target string */
883 const cups_utf8_t *src, /* I - Source string */
884 int maxout, /* I - Max output */
885 const cups_encoding_t encoding) /* I - Encoding */
886 {
887 cups_sbcs_t *start; /* Start of destination string */
888 _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
889 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
890 cups_utf32_t unichar; /* Character value */
891 cups_vbcs_t legchar; /* Legacy character value */
892 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
893 *workptr; /* Pointer into string */
894
895
896 DEBUG_printf(("7conv_utf8_to_vbcs(dest=%p, src=\"%s\", maxout=%d, "
897 "encoding=%d)", dest, src, maxout, encoding));
898
899 /*
900 * Find legacy charset map in cache...
901 */
902
903 if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
904 {
905 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (no charmap)");
906
907 return (-1);
908 }
909
910 /*
911 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
912 */
913
914 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
915 {
916 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (Unable to convert to UTF-32)");
917
918 return (-1);
919 }
920
921 /*
922 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
923 */
924
925 for (start = dest, workptr = work; *workptr && maxout > 0; maxout --)
926 {
927 unichar = *workptr++;
928
929 /*
930 * Convert ASCII verbatim (optimization)...
931 */
932
933 if (unichar < 0x80)
934 {
935 *dest++ = (cups_sbcs_t)unichar;
936
937 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X", (unsigned)unichar,
938 dest[-1]));
939
940 continue;
941 }
942
943 /*
944 * Convert unknown character to visible replacement...
945 */
946
947 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
948
949 if (vrow)
950 vrow += (int)(unichar & 0xff);
951
952 if (!vrow || !*vrow)
953 legchar = (cups_vbcs_t)'?';
954 else
955 legchar = (cups_vbcs_t)*vrow;
956
957 /*
958 * Save n-byte legacy character...
959 */
960
961 if (legchar > 0xffffff)
962 {
963 if (maxout < 5)
964 {
965 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
966
967 return (-1);
968 }
969
970 *dest++ = (cups_sbcs_t)(legchar >> 24);
971 *dest++ = (cups_sbcs_t)(legchar >> 16);
972 *dest++ = (cups_sbcs_t)(legchar >> 8);
973 *dest++ = (cups_sbcs_t)legchar;
974
975 maxout -= 3;
976
977 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X %02X",
978 (unsigned)unichar, dest[-4], dest[-3], dest[-2], dest[-1]));
979 }
980 else if (legchar > 0xffff)
981 {
982 if (maxout < 4)
983 {
984 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
985
986 return (-1);
987 }
988
989 *dest++ = (cups_sbcs_t)(legchar >> 16);
990 *dest++ = (cups_sbcs_t)(legchar >> 8);
991 *dest++ = (cups_sbcs_t)legchar;
992
993 maxout -= 2;
994
995 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X",
996 (unsigned)unichar, dest[-3], dest[-2], dest[-1]));
997 }
998 else if (legchar > 0xff)
999 {
1000 *dest++ = (cups_sbcs_t)(legchar >> 8);
1001 *dest++ = (cups_sbcs_t)legchar;
1002
1003 maxout --;
1004
1005 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X",
1006 (unsigned)unichar, dest[-2], dest[-1]));
1007 }
1008 else
1009 {
1010 *dest++ = (cups_sbcs_t)legchar;
1011
1012 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X",
1013 (unsigned)unichar, dest[-1]));
1014 }
1015 }
1016
1017 *dest = '\0';
1018
1019 vmap->used --;
1020
1021 DEBUG_printf(("8conv_utf8_to_vbcs: Returning %d characters",
1022 (int)(dest - start)));
1023
1024 return ((int)(dest - start));
1025 }
1026
1027
1028 /*
1029 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1030 */
1031
1032 static int /* O - Count or -1 on error */
1033 conv_vbcs_to_utf8(
1034 cups_utf8_t *dest, /* O - Target string */
1035 const cups_sbcs_t *src, /* I - Source string */
1036 int maxout, /* I - Max output */
1037 const cups_encoding_t encoding) /* I - Encoding */
1038 {
1039 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1040 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1041 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1042 cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
1043 cups_vbcs_t legchar; /* Legacy character value */
1044 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1045 *workptr; /* Pointer into string */
1046
1047
1048 /*
1049 * Find legacy charset map in cache...
1050 */
1051
1052 DEBUG_printf(("7conv_vbcs_to_utf8(dest=%p, src=%p, maxout=%d, encoding=%d)",
1053 dest, src, maxout, encoding));
1054
1055 if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1056 {
1057 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (NULL vmap)");
1058
1059 return (-1);
1060 }
1061
1062 /*
1063 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1064 */
1065
1066 work[0] = 0xfeff;
1067 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
1068 {
1069 legchar = *src++;
1070 leadchar = (cups_sbcs_t)legchar;
1071
1072 /*
1073 * Convert ASCII verbatim (optimization)...
1074 */
1075
1076 if (legchar < 0x80)
1077 {
1078 *workptr++ = (cups_utf32_t)legchar;
1079
1080 DEBUG_printf(("9conv_vbcs_to_utf8: %02X => %08X", src[-1],
1081 (unsigned)legchar));
1082 continue;
1083 }
1084
1085 /*
1086 * Convert 2-byte legacy character...
1087 */
1088
1089 if (vmap->lead2char[(int)leadchar] == leadchar)
1090 {
1091 if (!*src)
1092 {
1093 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string)");
1094
1095 return (-1);
1096 }
1097
1098 legchar = (legchar << 8) | *src++;
1099
1100 /*
1101 * Convert unknown character to Replacement Character...
1102 */
1103
1104 crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1105 if (crow)
1106 crow += (int) (legchar & 0xff);
1107
1108 if (!crow || !*crow)
1109 *workptr++ = 0xfffd;
1110 else
1111 *workptr++ = (cups_utf32_t)*crow;
1112
1113 DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X => %08X",
1114 src[-2], src[-1], (unsigned)workptr[-1]));
1115 continue;
1116 }
1117
1118 /*
1119 * Fetch 3-byte or 4-byte legacy character...
1120 */
1121
1122 if (vmap->lead3char[(int)leadchar] == leadchar)
1123 {
1124 if (!*src || !src[1])
1125 {
1126 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 2)");
1127
1128 return (-1);
1129 }
1130
1131 legchar = (legchar << 8) | *src++;
1132 legchar = (legchar << 8) | *src++;
1133 }
1134 else if (vmap->lead4char[(int)leadchar] == leadchar)
1135 {
1136 if (!*src || !src[1] || !src[2])
1137 {
1138 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 3)");
1139
1140 return (-1);
1141 }
1142
1143 legchar = (legchar << 8) | *src++;
1144 legchar = (legchar << 8) | *src++;
1145 legchar = (legchar << 8) | *src++;
1146 }
1147 else
1148 {
1149 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (bad character)");
1150
1151 return (-1);
1152 }
1153
1154 /*
1155 * Find 3-byte or 4-byte legacy character...
1156 */
1157
1158 wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1159 vmap->wide2uni,
1160 vmap->widecount,
1161 sizeof(_cups_wide2uni_t),
1162 compare_wide);
1163
1164 /*
1165 * Convert unknown character to Replacement Character...
1166 */
1167
1168 if (!wide2uni || !wide2uni->unichar)
1169 *workptr++ = 0xfffd;
1170 else
1171 *workptr++ = wide2uni->unichar;
1172
1173 if (vmap->lead3char[(int)leadchar] == leadchar)
1174 DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X => %08X",
1175 src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1176 else
1177 DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X %02X => %08X",
1178 src[-4], src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1179 }
1180
1181 *workptr = 0;
1182
1183 vmap->used --;
1184
1185 DEBUG_printf(("9conv_vbcs_to_utf8: Converting %d UTF-32 characters to UTF-8",
1186 (int)(workptr - work)));
1187
1188 /*
1189 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1190 */
1191
1192 return (cupsUTF32ToUTF8(dest, work, maxout));
1193 }
1194
1195
1196 /*
1197 * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
1198 */
1199
1200 static void
1201 free_sbcs_charmap(_cups_cmap_t *cmap) /* I - Character set */
1202 {
1203 int i; /* Looping variable */
1204
1205
1206 for (i = 0; i < 256; i ++)
1207 if (cmap->uni2char[i])
1208 free(cmap->uni2char[i]);
1209
1210 free(cmap);
1211 }
1212
1213
1214 /*
1215 * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1216 */
1217
1218 static void
1219 free_vbcs_charmap(_cups_vmap_t *vmap) /* I - Character set */
1220 {
1221 int i; /* Looping variable */
1222
1223
1224 for (i = 0; i < 256; i ++)
1225 if (vmap->char2uni[i])
1226 free(vmap->char2uni[i]);
1227
1228 for (i = 0; i < 256; i ++)
1229 if (vmap->uni2char[i])
1230 free(vmap->uni2char[i]);
1231
1232 if (vmap->wide2uni)
1233 free(vmap->wide2uni);
1234
1235 free(vmap);
1236 }
1237
1238
1239 /*
1240 * 'get_charmap()' - Lookup or get a character set map (private).
1241 *
1242 * This code handles single-byte (SBCS), double-byte (DBCS), and
1243 * variable-byte (VBCS) character sets _without_ charset escapes...
1244 * This code does not handle multiple-byte character sets (MBCS)
1245 * (such as ISO-2022-JP) with charset switching via escapes...
1246 */
1247
1248
1249 static void * /* O - Charset map pointer */
1250 get_charmap(
1251 const cups_encoding_t encoding) /* I - Encoding */
1252 {
1253 char filename[1024]; /* Filename for charset map file */
1254 _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1255
1256
1257 DEBUG_printf(("7get_charmap(encoding=%d)", encoding));
1258
1259 /*
1260 * Get the data directory and charset map name...
1261 */
1262
1263 snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1264 cg->cups_datadir, _cupsEncodingName(encoding));
1265
1266 DEBUG_printf(("9get_charmap: filename=\"%s\"", filename));
1267
1268 /*
1269 * Read charset map input file into cache...
1270 */
1271
1272 if (encoding < CUPS_ENCODING_SBCS_END)
1273 return (get_sbcs_charmap(encoding, filename));
1274 else if (encoding < CUPS_ENCODING_VBCS_END)
1275 return (get_vbcs_charmap(encoding, filename));
1276 else
1277 return (NULL);
1278 }
1279
1280
1281 /*
1282 * 'get_charmap_count()' - Count lines in a charmap file.
1283 */
1284
1285 static int /* O - Count or -1 on error */
1286 get_charmap_count(cups_file_t *fp) /* I - File to read from */
1287 {
1288 int count; /* Number of lines */
1289 char line[256]; /* Line from input map file */
1290
1291
1292 /*
1293 * Count lines in map input file...
1294 */
1295
1296 count = 0;
1297
1298 while (cupsFileGets(fp, line, sizeof(line)))
1299 if (line[0] == '0')
1300 count ++;
1301
1302 /*
1303 * Return the number of lines...
1304 */
1305
1306 if (count > 0)
1307 return (count);
1308 else
1309 return (-1);
1310 }
1311
1312
1313 /*
1314 * 'get_sbcs_charmap()' - Get SBCS Charmap.
1315 */
1316
1317 static _cups_cmap_t * /* O - Charmap or 0 on error */
1318 get_sbcs_charmap(
1319 const cups_encoding_t encoding, /* I - Charmap Encoding */
1320 const char *filename) /* I - Charmap Filename */
1321 {
1322 unsigned long legchar; /* Legacy character value */
1323 cups_utf32_t unichar; /* Unicode character value */
1324 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1325 cups_file_t *fp; /* Charset map file pointer */
1326 char *s; /* Line parsing pointer */
1327 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1328 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
1329 char line[256]; /* Line from charset map file */
1330
1331
1332 /*
1333 * See if we already have this SBCS charset map loaded...
1334 */
1335
1336 DEBUG_printf(("7get_sbcs_charmap(encoding=%d, filename=\"%s\")", encoding,
1337 filename));
1338
1339 for (cmap = cmap_cache; cmap; cmap = cmap->next)
1340 {
1341 if (cmap->encoding == encoding)
1342 {
1343 cmap->used ++;
1344 DEBUG_printf(("8get_sbcs_charmap: Returning existing cmap=%p", cmap));
1345
1346 return ((void *)cmap);
1347 }
1348 }
1349
1350 /*
1351 * Open SBCS charset map input file...
1352 */
1353
1354 if ((fp = cupsFileOpen(filename, "r")) == NULL)
1355 {
1356 DEBUG_printf(("8get_sbcs_charmap: Returning NULL (%s)", strerror(errno)));
1357
1358 return (NULL);
1359 }
1360
1361 /*
1362 * Allocate memory for SBCS charset map...
1363 */
1364
1365 if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1366 {
1367 cupsFileClose(fp);
1368 DEBUG_puts("8get_sbcs_charmap: Returning NULL (Unable to allocate memory)");
1369
1370 return (NULL);
1371 }
1372
1373 cmap->used ++;
1374 cmap->encoding = encoding;
1375
1376 /*
1377 * Save SBCS charset map into memory for transcoding...
1378 */
1379
1380 while (cupsFileGets(fp, line, sizeof(line)))
1381 {
1382 if (line[0] != '0')
1383 continue;
1384
1385 legchar = strtol(line, &s, 16);
1386 if (legchar < 0 || legchar > 0xff)
1387 goto sbcs_error;
1388
1389 unichar = strtol(s, NULL, 16);
1390 if (unichar < 0 || unichar > 0x10ffff)
1391 goto sbcs_error;
1392
1393 /*
1394 * Save legacy to Unicode mapping in direct lookup table...
1395 */
1396
1397 crow = cmap->char2uni + legchar;
1398 *crow = (cups_ucs2_t)(unichar & 0xffff);
1399
1400 /*
1401 * Save Unicode to legacy mapping in indirect lookup table...
1402 */
1403
1404 srow = cmap->uni2char[(unichar >> 8) & 0xff];
1405 if (!srow)
1406 {
1407 srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1408 if (!srow)
1409 goto sbcs_error;
1410
1411 cmap->uni2char[(unichar >> 8) & 0xff] = srow;
1412 }
1413
1414 srow += unichar & 0xff;
1415
1416 /*
1417 * Convert Replacement Character to visible replacement...
1418 */
1419
1420 if (unichar == 0xfffd)
1421 legchar = (unsigned long)'?';
1422
1423 /*
1424 * First (oldest) legacy character uses Unicode mapping cell...
1425 */
1426
1427 if (!*srow)
1428 *srow = (cups_sbcs_t)legchar;
1429 }
1430
1431 cupsFileClose(fp);
1432
1433 /*
1434 * Add it to the cache and return...
1435 */
1436
1437 cmap->next = cmap_cache;
1438 cmap_cache = cmap;
1439
1440 DEBUG_printf(("8get_sbcs_charmap: Returning new cmap=%p", cmap));
1441
1442 return (cmap);
1443
1444 /*
1445 * If we get here, there was an error in the cmap file...
1446 */
1447
1448 sbcs_error:
1449
1450 free_sbcs_charmap(cmap);
1451
1452 cupsFileClose(fp);
1453
1454 DEBUG_puts("8get_sbcs_charmap: Returning NULL (Read/format error)");
1455
1456 return (NULL);
1457 }
1458
1459
1460 /*
1461 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1462 */
1463
1464 static _cups_vmap_t * /* O - Charmap or 0 on error */
1465 get_vbcs_charmap(
1466 const cups_encoding_t encoding, /* I - Charmap Encoding */
1467 const char *filename) /* I - Charmap Filename */
1468 {
1469 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1470 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1471 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1472 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1473 cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
1474 unsigned long legchar; /* Legacy character value */
1475 cups_utf32_t unichar; /* Unicode character value */
1476 int mapcount; /* Count of lines in charmap file */
1477 cups_file_t *fp; /* Charset map file pointer */
1478 char *s; /* Line parsing pointer */
1479 char line[256]; /* Line from charset map file */
1480 int i; /* Loop variable */
1481 int legacy; /* 32-bit legacy char */
1482
1483
1484 DEBUG_printf(("7get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1485 encoding, filename));
1486
1487 /*
1488 * See if we already have this DBCS/VBCS charset map loaded...
1489 */
1490
1491 for (vmap = vmap_cache; vmap; vmap = vmap->next)
1492 {
1493 if (vmap->encoding == encoding)
1494 {
1495 vmap->used ++;
1496 DEBUG_printf(("8get_vbcs_charmap: Returning existing vmap=%p", vmap));
1497
1498 return ((void *)vmap);
1499 }
1500 }
1501
1502 /*
1503 * Open VBCS charset map input file...
1504 */
1505
1506 if ((fp = cupsFileOpen(filename, "r")) == NULL)
1507 {
1508 DEBUG_printf(("8get_vbcs_charmap: Returning NULL (%s)", strerror(errno)));
1509
1510 return (NULL);
1511 }
1512
1513 /*
1514 * Count lines in charmap file...
1515 */
1516
1517 if ((mapcount = get_charmap_count(fp)) <= 0)
1518 {
1519 DEBUG_puts("8get_vbcs_charmap: Unable to get charmap count!");
1520
1521 cupsFileClose(fp);
1522
1523 return (NULL);
1524 }
1525
1526 DEBUG_printf(("8get_vbcs_charmap: mapcount=%d", mapcount));
1527
1528 /*
1529 * Allocate memory for DBCS/VBCS charset map...
1530 */
1531
1532 if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1533 {
1534 DEBUG_puts("8get_vbcs_charmap: Unable to allocate memory!");
1535
1536 cupsFileClose(fp);
1537
1538 return (NULL);
1539 }
1540
1541 vmap->used ++;
1542 vmap->encoding = encoding;
1543
1544 /*
1545 * Save DBCS/VBCS charset map into memory for transcoding...
1546 */
1547
1548 wide2uni = NULL;
1549
1550 cupsFileRewind(fp);
1551
1552 i = 0;
1553 legacy = 0;
1554
1555 while (cupsFileGets(fp, line, sizeof(line)))
1556 {
1557 if (line[0] != '0')
1558 continue;
1559
1560 legchar = strtoul(line, &s, 16);
1561 if (legchar == ULONG_MAX)
1562 goto vbcs_error;
1563
1564 unichar = strtol(s, NULL, 16);
1565 if (unichar < 0 || unichar > 0x10ffff)
1566 goto vbcs_error;
1567
1568 i ++;
1569
1570 DEBUG_printf(("9get_vbcs_charmap: i=%d, legchar=0x%08lx, unichar=0x%04x", i,
1571 legchar, (unsigned)unichar));
1572
1573 /*
1574 * Save lead char of 2/3/4-byte legacy char...
1575 */
1576
1577 if (legchar > 0xffffff)
1578 {
1579 leadchar = (cups_sbcs_t)(legchar >> 24);
1580 vmap->lead4char[leadchar] = leadchar;
1581 }
1582 else if (legchar > 0xffff)
1583 {
1584 leadchar = (cups_sbcs_t)(legchar >> 16);
1585 vmap->lead3char[leadchar] = leadchar;
1586 }
1587 else
1588 {
1589 leadchar = (cups_sbcs_t)(legchar >> 8);
1590 vmap->lead2char[leadchar] = leadchar;
1591 }
1592
1593 /*
1594 * Save Legacy to Unicode mapping...
1595 */
1596
1597 if (legchar <= 0xffff)
1598 {
1599 /*
1600 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1601 */
1602
1603 crow = vmap->char2uni[(int)leadchar];
1604 if (!crow)
1605 {
1606 crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1607 if (!crow)
1608 goto vbcs_error;
1609
1610 vmap->char2uni[(int)leadchar] = crow;
1611 }
1612
1613 crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1614 }
1615 else
1616 {
1617 /*
1618 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1619 */
1620
1621 if (!legacy)
1622 {
1623 legacy = 1;
1624 vmap->widecount = (mapcount - i + 1);
1625 wide2uni = (_cups_wide2uni_t *)calloc(vmap->widecount,
1626 sizeof(_cups_wide2uni_t));
1627 if (!wide2uni)
1628 goto vbcs_error;
1629
1630 vmap->wide2uni = wide2uni;
1631 }
1632
1633 wide2uni->widechar = (cups_vbcs_t)legchar;
1634 wide2uni->unichar = (cups_ucs2_t)unichar;
1635 wide2uni ++;
1636 }
1637
1638 /*
1639 * Save Unicode to legacy mapping in indirect lookup table...
1640 */
1641
1642 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1643 if (!vrow)
1644 {
1645 vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1646 if (!vrow)
1647 goto vbcs_error;
1648
1649 vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1650 }
1651
1652 vrow += (int)(unichar & 0xff);
1653
1654 /*
1655 * Convert Replacement Character to visible replacement...
1656 */
1657
1658 if (unichar == 0xfffd)
1659 legchar = (unsigned long)'?';
1660
1661 /*
1662 * First (oldest) legacy character uses Unicode mapping cell...
1663 */
1664
1665 if (!*vrow)
1666 *vrow = (cups_vbcs_t)legchar;
1667 }
1668
1669 vmap->charcount = (i - vmap->widecount);
1670
1671 cupsFileClose(fp);
1672
1673 /*
1674 * Add it to the cache and return...
1675 */
1676
1677 vmap->next = vmap_cache;
1678 vmap_cache = vmap;
1679
1680 DEBUG_printf(("8get_vbcs_charmap: Returning new vmap=%p", vmap));
1681
1682 return (vmap);
1683
1684 /*
1685 * If we get here, the file contains errors...
1686 */
1687
1688 vbcs_error:
1689
1690 free_vbcs_charmap(vmap);
1691
1692 cupsFileClose(fp);
1693
1694 DEBUG_puts("8get_vbcs_charmap: Returning NULL (Read/format error)");
1695
1696 return (NULL);
1697 }
1698
1699
1700 /*
1701 * End of "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
1702 */