]> git.ipfire.org Git - thirdparty/cups.git/blob - cups/transcode.c
Merge changes from CUPS 1.5svn-r9136.
[thirdparty/cups.git] / cups / transcode.c
1 /*
2 * "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
3 *
4 * Transcoding support for CUPS.
5 *
6 * Copyright 2007-2010 by Apple Inc.
7 * Copyright 1997-2007 by Easy Software Products.
8 *
9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
14 *
15 * This file is subject to the Apple OS-Developed Software exception.
16 *
17 * Contents:
18 *
19 * _cupsCharmapFlush() - Flush all character set maps out of cache.
20 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
21 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
22 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
23 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
24 */
25
26 /*
27 * Include necessary headers...
28 */
29
30 #include "cups-private.h"
31 #include <limits.h>
32 #include <time.h>
33 #ifdef HAVE_ICONV_H
34 # include <iconv.h>
35 #endif /* HAVE_ICONV_H */
36
37
38 /*
39 * Local globals...
40 */
41
42 #ifdef HAVE_ICONV_H
43 static _cups_mutex_t map_mutex = _CUPS_MUTEX_INITIALIZER;
44 /* Mutex to control access to maps */
45 static iconv_t map_from_utf8 = (iconv_t)-1;
46 /* Convert from UTF-8 to charset */
47 static iconv_t map_to_utf8 = (iconv_t)-1;
48 /* Convert from charset to UTF-8 */
49 static cups_encoding_t map_encoding = CUPS_AUTO_ENCODING;
50 /* Which charset is cached */
51 #endif /* HAVE_ICONV_H */
52
53
54 /*
55 * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
56 */
57
58 void
59 _cupsCharmapFlush(void)
60 {
61 #ifdef HAVE_ICONV_H
62 if (map_from_utf8 != (iconv_t)-1)
63 {
64 iconv_close(map_from_utf8);
65 map_from_utf8 = (iconv_t)-1;
66 }
67
68 if (map_to_utf8 != (iconv_t)-1)
69 {
70 iconv_close(map_to_utf8);
71 map_to_utf8 = (iconv_t)-1;
72 }
73
74 map_encoding = CUPS_AUTO_ENCODING;
75 #endif /* HAVE_ICONV_H */
76 }
77
78
79 /*
80 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
81 */
82
83 int /* O - Count or -1 on error */
84 cupsCharsetToUTF8(
85 cups_utf8_t *dest, /* O - Target string */
86 const char *src, /* I - Source string */
87 const int maxout, /* I - Max output */
88 const cups_encoding_t encoding) /* I - Encoding */
89 {
90 cups_utf8_t *destptr; /* Pointer into UTF-8 buffer */
91 int bytes; /* Number of bytes converted */
92 size_t srclen, /* Length of source string */
93 outBytesLeft; /* Bytes remaining in output buffer */
94
95
96 /*
97 * Check for valid arguments...
98 */
99
100 DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)",
101 dest, src, maxout, encoding));
102
103 if (!dest || !src || maxout < 1)
104 {
105 if (dest)
106 *dest = '\0';
107
108 DEBUG_puts("3cupsCharsetToUTF8: Bad arguments, returning -1");
109 return (-1);
110 }
111
112 /*
113 * Handle identity conversions...
114 */
115
116 if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
117 encoding >= CUPS_ENCODING_VBCS_END)
118 {
119 strlcpy((char *)dest, src, maxout);
120 return ((int)strlen((char *)dest));
121 }
122
123 /*
124 * Handle ISO-8859-1 to UTF-8 directly...
125 */
126
127 destptr = dest;
128
129 if (encoding == CUPS_ISO8859_1)
130 {
131 int ch; /* Character from string */
132 cups_utf8_t *destend; /* End of UTF-8 buffer */
133
134
135 destend = dest + maxout - 2;
136
137 while (*src && destptr < destend)
138 {
139 ch = *src++ & 255;
140
141 if (ch & 128)
142 {
143 *destptr++ = 0xc0 | (ch >> 6);
144 *destptr++ = 0x80 | (ch & 0x3f);
145 }
146 else
147 *destptr++ = ch;
148 }
149
150 *destptr = '\0';
151
152 return ((int)(destptr - dest));
153 }
154
155 /*
156 * Convert input legacy charset to UTF-8...
157 */
158
159 #ifdef HAVE_ICONV_H
160 _cupsMutexLock(&map_mutex);
161
162 if (map_encoding != encoding)
163 {
164 _cupsCharmapFlush();
165
166 map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
167 map_to_utf8 = iconv_open("UTF-8", _cupsEncodingName(encoding));
168 map_encoding = encoding;
169 }
170
171 if (map_to_utf8 != (iconv_t)-1)
172 {
173 srclen = strlen(src);
174 outBytesLeft = maxout - 1;
175 bytes = (int)iconv(map_to_utf8, (char **)&src, &srclen,
176 (char **)&destptr, &outBytesLeft);
177 *destptr = '\0';
178
179 _cupsMutexUnlock(&map_mutex);
180
181 return ((int)(destptr - dest));
182 }
183
184 _cupsMutexUnlock(&map_mutex);
185 #endif /* HAVE_ICONV_H */
186
187 /*
188 * No iconv() support, so error out...
189 */
190
191 *destptr = '\0';
192
193 return (-1);
194 }
195
196
197 /*
198 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
199 */
200
201 int /* O - Count or -1 on error */
202 cupsUTF8ToCharset(
203 char *dest, /* O - Target string */
204 const cups_utf8_t *src, /* I - Source string */
205 const int maxout, /* I - Max output */
206 const cups_encoding_t encoding) /* I - Encoding */
207 {
208 char *destptr; /* Pointer into destination */
209 int bytes; /* Number of bytes converted */
210 size_t srclen, /* Length of source string */
211 outBytesLeft; /* Bytes remaining in output buffer */
212
213
214 /*
215 * Check for valid arguments...
216 */
217
218 if (!dest || !src || maxout < 1)
219 {
220 if (dest)
221 *dest = '\0';
222
223 return (-1);
224 }
225
226 /*
227 * Handle identity conversions...
228 */
229
230 if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
231 encoding >= CUPS_ENCODING_VBCS_END)
232 {
233 strlcpy(dest, (char *)src, maxout);
234 return ((int)strlen(dest));
235 }
236
237 /*
238 * Handle UTF-8 to ISO-8859-1 directly...
239 */
240
241 destptr = dest;
242
243 if (encoding == CUPS_ISO8859_1)
244 {
245 int ch; /* Character from string */
246 char *destend; /* End of ISO-8859-1 buffer */
247
248
249 destend = dest + maxout - 1;
250
251 while (*src && destptr < destend)
252 {
253 ch = *src++;
254
255 if ((ch & 0xe0) == 0xc0)
256 {
257 ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
258
259 if (ch < 256)
260 *destptr++ = ch;
261 else
262 *destptr++ = '?';
263 }
264 else if ((ch & 0xf0) == 0xe0 ||
265 (ch & 0xf8) == 0xf0)
266 *destptr++ = '?';
267 else if (!(ch & 0x80))
268 *destptr++ = ch;
269 }
270
271 *destptr = '\0';
272
273 return ((int)(destptr - dest));
274 }
275
276 #ifdef HAVE_ICONV_H
277 /*
278 * Convert input UTF-8 to legacy charset...
279 */
280
281 _cupsMutexLock(&map_mutex);
282
283 if (map_encoding != encoding)
284 {
285 _cupsCharmapFlush();
286
287 map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
288 map_to_utf8 = iconv_open("UTF-8", _cupsEncodingName(encoding));
289 map_encoding = encoding;
290 }
291
292 if (map_from_utf8 != (iconv_t)-1)
293 {
294 srclen = strlen((char *)src);
295 outBytesLeft = maxout - 1;
296 bytes = (int)iconv(map_from_utf8, (char **)&src, &srclen,
297 &destptr, &outBytesLeft);
298 *destptr = '\0';
299
300 _cupsMutexUnlock(&map_mutex);
301
302 return ((int)(destptr - dest));
303 }
304
305 _cupsMutexUnlock(&map_mutex);
306 #endif /* HAVE_ICONV_H */
307
308 /*
309 * No iconv() support, so error out...
310 */
311
312 *destptr = '\0';
313
314 return (-1);
315 }
316
317
318 /*
319 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
320 *
321 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
322 *
323 * UTF-32 char UTF-8 char(s)
324 * --------------------------------------------------
325 * 0 to 127 = 0xxxxxxx (US-ASCII)
326 * 128 to 2047 = 110xxxxx 10yyyyyy
327 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
328 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
329 *
330 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
331 * which would convert to five- or six-octet UTF-8 sequences...
332 */
333
334 int /* O - Count or -1 on error */
335 cupsUTF8ToUTF32(
336 cups_utf32_t *dest, /* O - Target string */
337 const cups_utf8_t *src, /* I - Source string */
338 const int maxout) /* I - Max output */
339 {
340 int i; /* Looping variable */
341 cups_utf8_t ch; /* Character value */
342 cups_utf8_t next; /* Next character value */
343 cups_utf32_t ch32; /* UTF-32 character value */
344
345
346 /*
347 * Check for valid arguments and clear output...
348 */
349
350 DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", dest,
351 src, maxout));
352
353 if (dest)
354 *dest = 0;
355
356 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
357 {
358 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad arguments)");
359
360 return (-1);
361 }
362
363 /*
364 * Convert input UTF-8 to output UTF-32...
365 */
366
367 for (i = maxout - 1; *src && i > 0; i --)
368 {
369 ch = *src++;
370
371 /*
372 * Convert UTF-8 character(s) to UTF-32 character...
373 */
374
375 if (!(ch & 0x80))
376 {
377 /*
378 * One-octet UTF-8 <= 127 (US-ASCII)...
379 */
380
381 *dest++ = ch;
382
383 DEBUG_printf(("4cupsUTF8ToUTF32: %02x => %08X", src[-1], ch));
384 continue;
385 }
386 else if ((ch & 0xe0) == 0xc0)
387 {
388 /*
389 * Two-octet UTF-8 <= 2047 (Latin-x)...
390 */
391
392 next = *src++;
393 if ((next & 0xc0) != 0x80)
394 {
395 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
396
397 return (-1);
398 }
399
400 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
401
402 /*
403 * Check for non-shortest form (invalid UTF-8)...
404 */
405
406 if (ch32 < 0x80)
407 {
408 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
409
410 return (-1);
411 }
412
413 *dest++ = ch32;
414
415 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x => %08X",
416 src[-2], src[-1], (unsigned)ch32));
417 }
418 else if ((ch & 0xf0) == 0xe0)
419 {
420 /*
421 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
422 */
423
424 next = *src++;
425 if ((next & 0xc0) != 0x80)
426 {
427 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
428
429 return (-1);
430 }
431
432 ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
433
434 next = *src++;
435 if ((next & 0xc0) != 0x80)
436 {
437 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
438
439 return (-1);
440 }
441
442 ch32 = (ch32 << 6) | (next & 0x3f);
443
444 /*
445 * Check for non-shortest form (invalid UTF-8)...
446 */
447
448 if (ch32 < 0x800)
449 {
450 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
451
452 return (-1);
453 }
454
455 *dest++ = ch32;
456
457 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x => %08X",
458 src[-3], src[-2], src[-1], (unsigned)ch32));
459 }
460 else if ((ch & 0xf8) == 0xf0)
461 {
462 /*
463 * Four-octet UTF-8...
464 */
465
466 next = *src++;
467 if ((next & 0xc0) != 0x80)
468 {
469 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
470
471 return (-1);
472 }
473
474 ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
475
476 next = *src++;
477 if ((next & 0xc0) != 0x80)
478 {
479 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
480
481 return (-1);
482 }
483
484 ch32 = (ch32 << 6) | (next & 0x3f);
485
486 next = *src++;
487 if ((next & 0xc0) != 0x80)
488 {
489 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
490
491 return (-1);
492 }
493
494 ch32 = (ch32 << 6) | (next & 0x3f);
495
496 /*
497 * Check for non-shortest form (invalid UTF-8)...
498 */
499
500 if (ch32 < 0x10000)
501 {
502 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
503
504 return (-1);
505 }
506
507 *dest++ = ch32;
508
509 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X",
510 src[-4], src[-3], src[-2], src[-1], (unsigned)ch32));
511 }
512 else
513 {
514 /*
515 * More than 4-octet (invalid UTF-8 sequence)...
516 */
517
518 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
519
520 return (-1);
521 }
522
523 /*
524 * Check for UTF-16 surrogate (illegal UTF-8)...
525 */
526
527 if (ch32 >= 0xd800 && ch32 <= 0xdfff)
528 return (-1);
529 }
530
531 *dest = 0;
532
533 DEBUG_printf(("3cupsUTF8ToUTF32: Returning %d characters", maxout - 1 - i));
534
535 return (maxout - 1 - i);
536 }
537
538
539 /*
540 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
541 *
542 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
543 *
544 * UTF-32 char UTF-8 char(s)
545 * --------------------------------------------------
546 * 0 to 127 = 0xxxxxxx (US-ASCII)
547 * 128 to 2047 = 110xxxxx 10yyyyyy
548 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
549 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
550 *
551 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
552 * which would convert to five- or six-octet UTF-8 sequences...
553 */
554
555 int /* O - Count or -1 on error */
556 cupsUTF32ToUTF8(
557 cups_utf8_t *dest, /* O - Target string */
558 const cups_utf32_t *src, /* I - Source string */
559 const int maxout) /* I - Max output */
560 {
561 cups_utf8_t *start; /* Start of destination string */
562 int i; /* Looping variable */
563 int swap; /* Byte-swap input to output */
564 cups_utf32_t ch; /* Character value */
565
566
567 /*
568 * Check for valid arguments and clear output...
569 */
570
571 DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", dest, src,
572 maxout));
573
574 if (dest)
575 *dest = '\0';
576
577 if (!dest || !src || maxout < 1)
578 {
579 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (bad args)");
580
581 return (-1);
582 }
583
584 /*
585 * Check for leading BOM in UTF-32 and inverted BOM...
586 */
587
588 start = dest;
589 swap = *src == 0xfffe0000;
590
591 DEBUG_printf(("4cupsUTF32ToUTF8: swap=%d", swap));
592
593 if (*src == 0xfffe0000 || *src == 0xfeff)
594 src ++;
595
596 /*
597 * Convert input UTF-32 to output UTF-8...
598 */
599
600 for (i = maxout - 1; *src && i > 0;)
601 {
602 ch = *src++;
603
604 /*
605 * Byte swap input UTF-32, if necessary...
606 * (only byte-swapping 24 of 32 bits)
607 */
608
609 if (swap)
610 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
611
612 /*
613 * Check for beyond Plane 16 (invalid UTF-32)...
614 */
615
616 if (ch > 0x10ffff)
617 {
618 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (character out of range)");
619
620 return (-1);
621 }
622
623 /*
624 * Convert UTF-32 character to UTF-8 character(s)...
625 */
626
627 if (ch < 0x80)
628 {
629 /*
630 * One-octet UTF-8 <= 127 (US-ASCII)...
631 */
632
633 *dest++ = (cups_utf8_t)ch;
634 i --;
635
636 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x", (unsigned)ch, dest[-1]));
637 }
638 else if (ch < 0x800)
639 {
640 /*
641 * Two-octet UTF-8 <= 2047 (Latin-x)...
642 */
643
644 if (i < 2)
645 {
646 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 2)");
647
648 return (-1);
649 }
650
651 *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
652 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
653 i -= 2;
654
655 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x", (unsigned)ch,
656 dest[-2], dest[-1]));
657 }
658 else if (ch < 0x10000)
659 {
660 /*
661 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
662 */
663
664 if (i < 3)
665 {
666 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 3)");
667
668 return (-1);
669 }
670
671 *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
672 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
673 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
674 i -= 3;
675
676 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x", (unsigned)ch,
677 dest[-3], dest[-2], dest[-1]));
678 }
679 else
680 {
681 /*
682 * Four-octet UTF-8...
683 */
684
685 if (i < 4)
686 {
687 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 4)");
688
689 return (-1);
690 }
691
692 *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
693 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
694 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
695 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
696 i -= 4;
697
698 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x",
699 (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]));
700 }
701 }
702
703 *dest = '\0';
704
705 DEBUG_printf(("3cupsUTF32ToUTF8: Returning %d", (int)(dest - start)));
706
707 return ((int)(dest - start));
708 }
709
710
711 /*
712 * 'compare_wide()' - Compare key for wide (VBCS) match.
713 */
714
715 static int
716 compare_wide(const void *k1, /* I - Key char */
717 const void *k2) /* I - Map char */
718 {
719 cups_vbcs_t key; /* Legacy key character */
720 cups_vbcs_t map; /* Legacy map character */
721
722
723 key = *((cups_vbcs_t *)k1);
724 map = ((_cups_wide2uni_t *)k2)->widechar;
725
726 return ((int)(key - map));
727 }
728
729
730 /*
731 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
732 */
733
734 static int /* O - Count or -1 on error */
735 conv_sbcs_to_utf8(
736 cups_utf8_t *dest, /* O - Target string */
737 const cups_sbcs_t *src, /* I - Source string */
738 int maxout, /* I - Max output */
739 const cups_encoding_t encoding) /* I - Encoding */
740 {
741 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
742 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
743 cups_sbcs_t legchar; /* Legacy character value */
744 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
745 *workptr; /* Pointer into string */
746
747
748 /*
749 * Find legacy charset map in cache...
750 */
751
752 if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
753 return (-1);
754
755 /*
756 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
757 */
758
759 work[0] = 0xfeff;
760 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
761 {
762 legchar = *src++;
763
764 /*
765 * Convert ASCII verbatim (optimization)...
766 */
767
768 if (legchar < 0x80)
769 *workptr++ = (cups_utf32_t)legchar;
770 else
771 {
772 /*
773 * Convert unknown character to Replacement Character...
774 */
775
776 crow = cmap->char2uni + legchar;
777
778 if (!*crow)
779 *workptr++ = 0xfffd;
780 else
781 *workptr++ = (cups_utf32_t)*crow;
782 }
783 }
784
785 *workptr = 0;
786
787 /*
788 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
789 */
790
791 cmap->used --;
792
793 return (cupsUTF32ToUTF8(dest, work, maxout));
794 }
795
796
797 /*
798 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
799 */
800
801 static int /* O - Count or -1 on error */
802 conv_utf8_to_sbcs(
803 cups_sbcs_t *dest, /* O - Target string */
804 const cups_utf8_t *src, /* I - Source string */
805 int maxout, /* I - Max output */
806 const cups_encoding_t encoding) /* I - Encoding */
807 {
808 cups_sbcs_t *start; /* Start of destination string */
809 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
810 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
811 cups_utf32_t unichar; /* Character value */
812 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
813 *workptr; /* Pointer into string */
814
815
816 /*
817 * Find legacy charset map in cache...
818 */
819
820 if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
821 return (-1);
822
823 /*
824 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
825 */
826
827 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
828 return (-1);
829
830 /*
831 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
832 */
833
834 for (workptr = work, start = dest; *workptr && maxout > 0; maxout --)
835 {
836 unichar = *workptr++;
837 if (!unichar)
838 break;
839
840 /*
841 * Convert ASCII verbatim (optimization)...
842 */
843
844 if (unichar < 0x80)
845 {
846 *dest++ = (cups_sbcs_t)unichar;
847 continue;
848 }
849
850 /*
851 * Convert unknown character to visible replacement...
852 */
853
854 srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
855
856 if (srow)
857 srow += (int)(unichar & 0xff);
858
859 if (!srow || !*srow)
860 *dest++ = '?';
861 else
862 *dest++ = *srow;
863 }
864
865 *dest = '\0';
866
867 cmap->used --;
868
869 return ((int)(dest - start));
870 }
871
872
873 /*
874 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
875 */
876
877 static int /* O - Count or -1 on error */
878 conv_utf8_to_vbcs(
879 cups_sbcs_t *dest, /* O - Target string */
880 const cups_utf8_t *src, /* I - Source string */
881 int maxout, /* I - Max output */
882 const cups_encoding_t encoding) /* I - Encoding */
883 {
884 cups_sbcs_t *start; /* Start of destination string */
885 _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
886 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
887 cups_utf32_t unichar; /* Character value */
888 cups_vbcs_t legchar; /* Legacy character value */
889 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
890 *workptr; /* Pointer into string */
891
892
893 DEBUG_printf(("7conv_utf8_to_vbcs(dest=%p, src=\"%s\", maxout=%d, "
894 "encoding=%d)", dest, src, maxout, encoding));
895
896 /*
897 * Find legacy charset map in cache...
898 */
899
900 if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
901 {
902 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (no charmap)");
903
904 return (-1);
905 }
906
907 /*
908 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
909 */
910
911 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
912 {
913 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (Unable to convert to UTF-32)");
914
915 return (-1);
916 }
917
918 /*
919 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
920 */
921
922 for (start = dest, workptr = work; *workptr && maxout > 0; maxout --)
923 {
924 unichar = *workptr++;
925
926 /*
927 * Convert ASCII verbatim (optimization)...
928 */
929
930 if (unichar < 0x80)
931 {
932 *dest++ = (cups_sbcs_t)unichar;
933
934 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X", (unsigned)unichar,
935 dest[-1]));
936
937 continue;
938 }
939
940 /*
941 * Convert unknown character to visible replacement...
942 */
943
944 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
945
946 if (vrow)
947 vrow += (int)(unichar & 0xff);
948
949 if (!vrow || !*vrow)
950 legchar = (cups_vbcs_t)'?';
951 else
952 legchar = (cups_vbcs_t)*vrow;
953
954 /*
955 * Save n-byte legacy character...
956 */
957
958 if (legchar > 0xffffff)
959 {
960 if (maxout < 5)
961 {
962 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
963
964 return (-1);
965 }
966
967 *dest++ = (cups_sbcs_t)(legchar >> 24);
968 *dest++ = (cups_sbcs_t)(legchar >> 16);
969 *dest++ = (cups_sbcs_t)(legchar >> 8);
970 *dest++ = (cups_sbcs_t)legchar;
971
972 maxout -= 3;
973
974 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X %02X",
975 (unsigned)unichar, dest[-4], dest[-3], dest[-2], dest[-1]));
976 }
977 else if (legchar > 0xffff)
978 {
979 if (maxout < 4)
980 {
981 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
982
983 return (-1);
984 }
985
986 *dest++ = (cups_sbcs_t)(legchar >> 16);
987 *dest++ = (cups_sbcs_t)(legchar >> 8);
988 *dest++ = (cups_sbcs_t)legchar;
989
990 maxout -= 2;
991
992 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X",
993 (unsigned)unichar, dest[-3], dest[-2], dest[-1]));
994 }
995 else if (legchar > 0xff)
996 {
997 *dest++ = (cups_sbcs_t)(legchar >> 8);
998 *dest++ = (cups_sbcs_t)legchar;
999
1000 maxout --;
1001
1002 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X",
1003 (unsigned)unichar, dest[-2], dest[-1]));
1004 }
1005 else
1006 {
1007 *dest++ = (cups_sbcs_t)legchar;
1008
1009 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X",
1010 (unsigned)unichar, dest[-1]));
1011 }
1012 }
1013
1014 *dest = '\0';
1015
1016 vmap->used --;
1017
1018 DEBUG_printf(("8conv_utf8_to_vbcs: Returning %d characters",
1019 (int)(dest - start)));
1020
1021 return ((int)(dest - start));
1022 }
1023
1024
1025 /*
1026 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1027 */
1028
1029 static int /* O - Count or -1 on error */
1030 conv_vbcs_to_utf8(
1031 cups_utf8_t *dest, /* O - Target string */
1032 const cups_sbcs_t *src, /* I - Source string */
1033 int maxout, /* I - Max output */
1034 const cups_encoding_t encoding) /* I - Encoding */
1035 {
1036 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1037 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1038 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1039 cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
1040 cups_vbcs_t legchar; /* Legacy character value */
1041 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1042 *workptr; /* Pointer into string */
1043
1044
1045 /*
1046 * Find legacy charset map in cache...
1047 */
1048
1049 DEBUG_printf(("7conv_vbcs_to_utf8(dest=%p, src=%p, maxout=%d, encoding=%d)",
1050 dest, src, maxout, encoding));
1051
1052 if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1053 {
1054 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (NULL vmap)");
1055
1056 return (-1);
1057 }
1058
1059 /*
1060 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1061 */
1062
1063 work[0] = 0xfeff;
1064 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
1065 {
1066 legchar = *src++;
1067 leadchar = (cups_sbcs_t)legchar;
1068
1069 /*
1070 * Convert ASCII verbatim (optimization)...
1071 */
1072
1073 if (legchar < 0x80)
1074 {
1075 *workptr++ = (cups_utf32_t)legchar;
1076
1077 DEBUG_printf(("9conv_vbcs_to_utf8: %02X => %08X", src[-1],
1078 (unsigned)legchar));
1079 continue;
1080 }
1081
1082 /*
1083 * Convert 2-byte legacy character...
1084 */
1085
1086 if (vmap->lead2char[(int)leadchar] == leadchar)
1087 {
1088 if (!*src)
1089 {
1090 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string)");
1091
1092 return (-1);
1093 }
1094
1095 legchar = (legchar << 8) | *src++;
1096
1097 /*
1098 * Convert unknown character to Replacement Character...
1099 */
1100
1101 crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1102 if (crow)
1103 crow += (int) (legchar & 0xff);
1104
1105 if (!crow || !*crow)
1106 *workptr++ = 0xfffd;
1107 else
1108 *workptr++ = (cups_utf32_t)*crow;
1109
1110 DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X => %08X",
1111 src[-2], src[-1], (unsigned)workptr[-1]));
1112 continue;
1113 }
1114
1115 /*
1116 * Fetch 3-byte or 4-byte legacy character...
1117 */
1118
1119 if (vmap->lead3char[(int)leadchar] == leadchar)
1120 {
1121 if (!*src || !src[1])
1122 {
1123 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 2)");
1124
1125 return (-1);
1126 }
1127
1128 legchar = (legchar << 8) | *src++;
1129 legchar = (legchar << 8) | *src++;
1130 }
1131 else if (vmap->lead4char[(int)leadchar] == leadchar)
1132 {
1133 if (!*src || !src[1] || !src[2])
1134 {
1135 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 3)");
1136
1137 return (-1);
1138 }
1139
1140 legchar = (legchar << 8) | *src++;
1141 legchar = (legchar << 8) | *src++;
1142 legchar = (legchar << 8) | *src++;
1143 }
1144 else
1145 {
1146 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (bad character)");
1147
1148 return (-1);
1149 }
1150
1151 /*
1152 * Find 3-byte or 4-byte legacy character...
1153 */
1154
1155 wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1156 vmap->wide2uni,
1157 vmap->widecount,
1158 sizeof(_cups_wide2uni_t),
1159 compare_wide);
1160
1161 /*
1162 * Convert unknown character to Replacement Character...
1163 */
1164
1165 if (!wide2uni || !wide2uni->unichar)
1166 *workptr++ = 0xfffd;
1167 else
1168 *workptr++ = wide2uni->unichar;
1169
1170 if (vmap->lead3char[(int)leadchar] == leadchar)
1171 DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X => %08X",
1172 src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1173 else
1174 DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X %02X => %08X",
1175 src[-4], src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1176 }
1177
1178 *workptr = 0;
1179
1180 vmap->used --;
1181
1182 DEBUG_printf(("9conv_vbcs_to_utf8: Converting %d UTF-32 characters to UTF-8",
1183 (int)(workptr - work)));
1184
1185 /*
1186 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1187 */
1188
1189 return (cupsUTF32ToUTF8(dest, work, maxout));
1190 }
1191
1192
1193 /*
1194 * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
1195 */
1196
1197 static void
1198 free_sbcs_charmap(_cups_cmap_t *cmap) /* I - Character set */
1199 {
1200 int i; /* Looping variable */
1201
1202
1203 for (i = 0; i < 256; i ++)
1204 if (cmap->uni2char[i])
1205 free(cmap->uni2char[i]);
1206
1207 free(cmap);
1208 }
1209
1210
1211 /*
1212 * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1213 */
1214
1215 static void
1216 free_vbcs_charmap(_cups_vmap_t *vmap) /* I - Character set */
1217 {
1218 int i; /* Looping variable */
1219
1220
1221 for (i = 0; i < 256; i ++)
1222 if (vmap->char2uni[i])
1223 free(vmap->char2uni[i]);
1224
1225 for (i = 0; i < 256; i ++)
1226 if (vmap->uni2char[i])
1227 free(vmap->uni2char[i]);
1228
1229 if (vmap->wide2uni)
1230 free(vmap->wide2uni);
1231
1232 free(vmap);
1233 }
1234
1235
1236 /*
1237 * 'get_charmap()' - Lookup or get a character set map (private).
1238 *
1239 * This code handles single-byte (SBCS), double-byte (DBCS), and
1240 * variable-byte (VBCS) character sets _without_ charset escapes...
1241 * This code does not handle multiple-byte character sets (MBCS)
1242 * (such as ISO-2022-JP) with charset switching via escapes...
1243 */
1244
1245
1246 static void * /* O - Charset map pointer */
1247 get_charmap(
1248 const cups_encoding_t encoding) /* I - Encoding */
1249 {
1250 char filename[1024]; /* Filename for charset map file */
1251 _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1252
1253
1254 DEBUG_printf(("7get_charmap(encoding=%d)", encoding));
1255
1256 /*
1257 * Get the data directory and charset map name...
1258 */
1259
1260 snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1261 cg->cups_datadir, _cupsEncodingName(encoding));
1262
1263 DEBUG_printf(("9get_charmap: filename=\"%s\"", filename));
1264
1265 /*
1266 * Read charset map input file into cache...
1267 */
1268
1269 if (encoding < CUPS_ENCODING_SBCS_END)
1270 return (get_sbcs_charmap(encoding, filename));
1271 else if (encoding < CUPS_ENCODING_VBCS_END)
1272 return (get_vbcs_charmap(encoding, filename));
1273 else
1274 return (NULL);
1275 }
1276
1277
1278 /*
1279 * 'get_charmap_count()' - Count lines in a charmap file.
1280 */
1281
1282 static int /* O - Count or -1 on error */
1283 get_charmap_count(cups_file_t *fp) /* I - File to read from */
1284 {
1285 int count; /* Number of lines */
1286 char line[256]; /* Line from input map file */
1287
1288
1289 /*
1290 * Count lines in map input file...
1291 */
1292
1293 count = 0;
1294
1295 while (cupsFileGets(fp, line, sizeof(line)))
1296 if (line[0] == '0')
1297 count ++;
1298
1299 /*
1300 * Return the number of lines...
1301 */
1302
1303 if (count > 0)
1304 return (count);
1305 else
1306 return (-1);
1307 }
1308
1309
1310 /*
1311 * 'get_sbcs_charmap()' - Get SBCS Charmap.
1312 */
1313
1314 static _cups_cmap_t * /* O - Charmap or 0 on error */
1315 get_sbcs_charmap(
1316 const cups_encoding_t encoding, /* I - Charmap Encoding */
1317 const char *filename) /* I - Charmap Filename */
1318 {
1319 unsigned long legchar; /* Legacy character value */
1320 cups_utf32_t unichar; /* Unicode character value */
1321 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1322 cups_file_t *fp; /* Charset map file pointer */
1323 char *s; /* Line parsing pointer */
1324 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1325 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
1326 char line[256]; /* Line from charset map file */
1327
1328
1329 /*
1330 * See if we already have this SBCS charset map loaded...
1331 */
1332
1333 DEBUG_printf(("7get_sbcs_charmap(encoding=%d, filename=\"%s\")", encoding,
1334 filename));
1335
1336 for (cmap = cmap_cache; cmap; cmap = cmap->next)
1337 {
1338 if (cmap->encoding == encoding)
1339 {
1340 cmap->used ++;
1341 DEBUG_printf(("8get_sbcs_charmap: Returning existing cmap=%p", cmap));
1342
1343 return ((void *)cmap);
1344 }
1345 }
1346
1347 /*
1348 * Open SBCS charset map input file...
1349 */
1350
1351 if ((fp = cupsFileOpen(filename, "r")) == NULL)
1352 {
1353 DEBUG_printf(("8get_sbcs_charmap: Returning NULL (%s)", strerror(errno)));
1354
1355 return (NULL);
1356 }
1357
1358 /*
1359 * Allocate memory for SBCS charset map...
1360 */
1361
1362 if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1363 {
1364 cupsFileClose(fp);
1365 DEBUG_puts("8get_sbcs_charmap: Returning NULL (Unable to allocate memory)");
1366
1367 return (NULL);
1368 }
1369
1370 cmap->used ++;
1371 cmap->encoding = encoding;
1372
1373 /*
1374 * Save SBCS charset map into memory for transcoding...
1375 */
1376
1377 while (cupsFileGets(fp, line, sizeof(line)))
1378 {
1379 if (line[0] != '0')
1380 continue;
1381
1382 legchar = strtol(line, &s, 16);
1383 if (legchar < 0 || legchar > 0xff)
1384 goto sbcs_error;
1385
1386 unichar = strtol(s, NULL, 16);
1387 if (unichar < 0 || unichar > 0x10ffff)
1388 goto sbcs_error;
1389
1390 /*
1391 * Save legacy to Unicode mapping in direct lookup table...
1392 */
1393
1394 crow = cmap->char2uni + legchar;
1395 *crow = (cups_ucs2_t)(unichar & 0xffff);
1396
1397 /*
1398 * Save Unicode to legacy mapping in indirect lookup table...
1399 */
1400
1401 srow = cmap->uni2char[(unichar >> 8) & 0xff];
1402 if (!srow)
1403 {
1404 srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1405 if (!srow)
1406 goto sbcs_error;
1407
1408 cmap->uni2char[(unichar >> 8) & 0xff] = srow;
1409 }
1410
1411 srow += unichar & 0xff;
1412
1413 /*
1414 * Convert Replacement Character to visible replacement...
1415 */
1416
1417 if (unichar == 0xfffd)
1418 legchar = (unsigned long)'?';
1419
1420 /*
1421 * First (oldest) legacy character uses Unicode mapping cell...
1422 */
1423
1424 if (!*srow)
1425 *srow = (cups_sbcs_t)legchar;
1426 }
1427
1428 cupsFileClose(fp);
1429
1430 /*
1431 * Add it to the cache and return...
1432 */
1433
1434 cmap->next = cmap_cache;
1435 cmap_cache = cmap;
1436
1437 DEBUG_printf(("8get_sbcs_charmap: Returning new cmap=%p", cmap));
1438
1439 return (cmap);
1440
1441 /*
1442 * If we get here, there was an error in the cmap file...
1443 */
1444
1445 sbcs_error:
1446
1447 free_sbcs_charmap(cmap);
1448
1449 cupsFileClose(fp);
1450
1451 DEBUG_puts("8get_sbcs_charmap: Returning NULL (Read/format error)");
1452
1453 return (NULL);
1454 }
1455
1456
1457 /*
1458 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1459 */
1460
1461 static _cups_vmap_t * /* O - Charmap or 0 on error */
1462 get_vbcs_charmap(
1463 const cups_encoding_t encoding, /* I - Charmap Encoding */
1464 const char *filename) /* I - Charmap Filename */
1465 {
1466 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1467 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1468 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1469 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1470 cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
1471 unsigned long legchar; /* Legacy character value */
1472 cups_utf32_t unichar; /* Unicode character value */
1473 int mapcount; /* Count of lines in charmap file */
1474 cups_file_t *fp; /* Charset map file pointer */
1475 char *s; /* Line parsing pointer */
1476 char line[256]; /* Line from charset map file */
1477 int i; /* Loop variable */
1478 int legacy; /* 32-bit legacy char */
1479
1480
1481 DEBUG_printf(("7get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1482 encoding, filename));
1483
1484 /*
1485 * See if we already have this DBCS/VBCS charset map loaded...
1486 */
1487
1488 for (vmap = vmap_cache; vmap; vmap = vmap->next)
1489 {
1490 if (vmap->encoding == encoding)
1491 {
1492 vmap->used ++;
1493 DEBUG_printf(("8get_vbcs_charmap: Returning existing vmap=%p", vmap));
1494
1495 return ((void *)vmap);
1496 }
1497 }
1498
1499 /*
1500 * Open VBCS charset map input file...
1501 */
1502
1503 if ((fp = cupsFileOpen(filename, "r")) == NULL)
1504 {
1505 DEBUG_printf(("8get_vbcs_charmap: Returning NULL (%s)", strerror(errno)));
1506
1507 return (NULL);
1508 }
1509
1510 /*
1511 * Count lines in charmap file...
1512 */
1513
1514 if ((mapcount = get_charmap_count(fp)) <= 0)
1515 {
1516 DEBUG_puts("8get_vbcs_charmap: Unable to get charmap count!");
1517
1518 cupsFileClose(fp);
1519
1520 return (NULL);
1521 }
1522
1523 DEBUG_printf(("8get_vbcs_charmap: mapcount=%d", mapcount));
1524
1525 /*
1526 * Allocate memory for DBCS/VBCS charset map...
1527 */
1528
1529 if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1530 {
1531 DEBUG_puts("8get_vbcs_charmap: Unable to allocate memory!");
1532
1533 cupsFileClose(fp);
1534
1535 return (NULL);
1536 }
1537
1538 vmap->used ++;
1539 vmap->encoding = encoding;
1540
1541 /*
1542 * Save DBCS/VBCS charset map into memory for transcoding...
1543 */
1544
1545 wide2uni = NULL;
1546
1547 cupsFileRewind(fp);
1548
1549 i = 0;
1550 legacy = 0;
1551
1552 while (cupsFileGets(fp, line, sizeof(line)))
1553 {
1554 if (line[0] != '0')
1555 continue;
1556
1557 legchar = strtoul(line, &s, 16);
1558 if (legchar == ULONG_MAX)
1559 goto vbcs_error;
1560
1561 unichar = strtol(s, NULL, 16);
1562 if (unichar < 0 || unichar > 0x10ffff)
1563 goto vbcs_error;
1564
1565 i ++;
1566
1567 DEBUG_printf(("9get_vbcs_charmap: i=%d, legchar=0x%08lx, unichar=0x%04x", i,
1568 legchar, (unsigned)unichar));
1569
1570 /*
1571 * Save lead char of 2/3/4-byte legacy char...
1572 */
1573
1574 if (legchar > 0xffffff)
1575 {
1576 leadchar = (cups_sbcs_t)(legchar >> 24);
1577 vmap->lead4char[leadchar] = leadchar;
1578 }
1579 else if (legchar > 0xffff)
1580 {
1581 leadchar = (cups_sbcs_t)(legchar >> 16);
1582 vmap->lead3char[leadchar] = leadchar;
1583 }
1584 else
1585 {
1586 leadchar = (cups_sbcs_t)(legchar >> 8);
1587 vmap->lead2char[leadchar] = leadchar;
1588 }
1589
1590 /*
1591 * Save Legacy to Unicode mapping...
1592 */
1593
1594 if (legchar <= 0xffff)
1595 {
1596 /*
1597 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1598 */
1599
1600 crow = vmap->char2uni[(int)leadchar];
1601 if (!crow)
1602 {
1603 crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1604 if (!crow)
1605 goto vbcs_error;
1606
1607 vmap->char2uni[(int)leadchar] = crow;
1608 }
1609
1610 crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1611 }
1612 else
1613 {
1614 /*
1615 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1616 */
1617
1618 if (!legacy)
1619 {
1620 legacy = 1;
1621 vmap->widecount = (mapcount - i + 1);
1622 wide2uni = (_cups_wide2uni_t *)calloc(vmap->widecount,
1623 sizeof(_cups_wide2uni_t));
1624 if (!wide2uni)
1625 goto vbcs_error;
1626
1627 vmap->wide2uni = wide2uni;
1628 }
1629
1630 wide2uni->widechar = (cups_vbcs_t)legchar;
1631 wide2uni->unichar = (cups_ucs2_t)unichar;
1632 wide2uni ++;
1633 }
1634
1635 /*
1636 * Save Unicode to legacy mapping in indirect lookup table...
1637 */
1638
1639 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1640 if (!vrow)
1641 {
1642 vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1643 if (!vrow)
1644 goto vbcs_error;
1645
1646 vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1647 }
1648
1649 vrow += (int)(unichar & 0xff);
1650
1651 /*
1652 * Convert Replacement Character to visible replacement...
1653 */
1654
1655 if (unichar == 0xfffd)
1656 legchar = (unsigned long)'?';
1657
1658 /*
1659 * First (oldest) legacy character uses Unicode mapping cell...
1660 */
1661
1662 if (!*vrow)
1663 *vrow = (cups_vbcs_t)legchar;
1664 }
1665
1666 vmap->charcount = (i - vmap->widecount);
1667
1668 cupsFileClose(fp);
1669
1670 /*
1671 * Add it to the cache and return...
1672 */
1673
1674 vmap->next = vmap_cache;
1675 vmap_cache = vmap;
1676
1677 DEBUG_printf(("8get_vbcs_charmap: Returning new vmap=%p", vmap));
1678
1679 return (vmap);
1680
1681 /*
1682 * If we get here, the file contains errors...
1683 */
1684
1685 vbcs_error:
1686
1687 free_vbcs_charmap(vmap);
1688
1689 cupsFileClose(fp);
1690
1691 DEBUG_puts("8get_vbcs_charmap: Returning NULL (Read/format error)");
1692
1693 return (NULL);
1694 }
1695
1696
1697 /*
1698 * End of "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
1699 */