]> git.ipfire.org Git - thirdparty/cups.git/blob - cups/transcode.c
Merge changes from CUPS 1.5svn-r9214.
[thirdparty/cups.git] / cups / transcode.c
1 /*
2 * "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
3 *
4 * Transcoding support for CUPS.
5 *
6 * Copyright 2007-2010 by Apple Inc.
7 * Copyright 1997-2007 by Easy Software Products.
8 *
9 * These coded instructions, statements, and computer programs are the
10 * property of Apple Inc. and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "LICENSE.txt"
12 * which should have been included with this file. If this file is
13 * file is missing or damaged, see the license at "http://www.cups.org/".
14 *
15 * This file is subject to the Apple OS-Developed Software exception.
16 *
17 * Contents:
18 *
19 * _cupsCharmapFlush() - Flush all character set maps out of cache.
20 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
21 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
22 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
23 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
24 */
25
26 /*
27 * Include necessary headers...
28 */
29
30 #include "cups-private.h"
31 #include <limits.h>
32 #include <time.h>
33 #ifdef HAVE_ICONV_H
34 # include <iconv.h>
35 #endif /* HAVE_ICONV_H */
36
37
38 /*
39 * Local globals...
40 */
41
42 #ifdef HAVE_ICONV_H
43 static _cups_mutex_t map_mutex = _CUPS_MUTEX_INITIALIZER;
44 /* Mutex to control access to maps */
45 static iconv_t map_from_utf8 = (iconv_t)-1;
46 /* Convert from UTF-8 to charset */
47 static iconv_t map_to_utf8 = (iconv_t)-1;
48 /* Convert from charset to UTF-8 */
49 static cups_encoding_t map_encoding = CUPS_AUTO_ENCODING;
50 /* Which charset is cached */
51 #endif /* HAVE_ICONV_H */
52
53
54 /*
55 * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
56 */
57
58 void
59 _cupsCharmapFlush(void)
60 {
61 #ifdef HAVE_ICONV_H
62 if (map_from_utf8 != (iconv_t)-1)
63 {
64 iconv_close(map_from_utf8);
65 map_from_utf8 = (iconv_t)-1;
66 }
67
68 if (map_to_utf8 != (iconv_t)-1)
69 {
70 iconv_close(map_to_utf8);
71 map_to_utf8 = (iconv_t)-1;
72 }
73
74 map_encoding = CUPS_AUTO_ENCODING;
75 #endif /* HAVE_ICONV_H */
76 }
77
78
79 /*
80 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
81 */
82
83 int /* O - Count or -1 on error */
84 cupsCharsetToUTF8(
85 cups_utf8_t *dest, /* O - Target string */
86 const char *src, /* I - Source string */
87 const int maxout, /* I - Max output */
88 const cups_encoding_t encoding) /* I - Encoding */
89 {
90 cups_utf8_t *destptr; /* Pointer into UTF-8 buffer */
91 size_t srclen, /* Length of source string */
92 outBytesLeft; /* Bytes remaining in output buffer */
93
94
95 /*
96 * Check for valid arguments...
97 */
98
99 DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)",
100 dest, src, maxout, encoding));
101
102 if (!dest || !src || maxout < 1)
103 {
104 if (dest)
105 *dest = '\0';
106
107 DEBUG_puts("3cupsCharsetToUTF8: Bad arguments, returning -1");
108 return (-1);
109 }
110
111 /*
112 * Handle identity conversions...
113 */
114
115 if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
116 encoding >= CUPS_ENCODING_VBCS_END)
117 {
118 strlcpy((char *)dest, src, maxout);
119 return ((int)strlen((char *)dest));
120 }
121
122 /*
123 * Handle ISO-8859-1 to UTF-8 directly...
124 */
125
126 destptr = dest;
127
128 if (encoding == CUPS_ISO8859_1)
129 {
130 int ch; /* Character from string */
131 cups_utf8_t *destend; /* End of UTF-8 buffer */
132
133
134 destend = dest + maxout - 2;
135
136 while (*src && destptr < destend)
137 {
138 ch = *src++ & 255;
139
140 if (ch & 128)
141 {
142 *destptr++ = 0xc0 | (ch >> 6);
143 *destptr++ = 0x80 | (ch & 0x3f);
144 }
145 else
146 *destptr++ = ch;
147 }
148
149 *destptr = '\0';
150
151 return ((int)(destptr - dest));
152 }
153
154 /*
155 * Convert input legacy charset to UTF-8...
156 */
157
158 #ifdef HAVE_ICONV_H
159 _cupsMutexLock(&map_mutex);
160
161 if (map_encoding != encoding)
162 {
163 _cupsCharmapFlush();
164
165 map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
166 map_to_utf8 = iconv_open("UTF-8", _cupsEncodingName(encoding));
167 map_encoding = encoding;
168 }
169
170 if (map_to_utf8 != (iconv_t)-1)
171 {
172 srclen = strlen(src);
173 outBytesLeft = maxout - 1;
174
175 iconv(map_to_utf8, (char **)&src, &srclen, (char **)&destptr,
176 &outBytesLeft);
177 *destptr = '\0';
178
179 _cupsMutexUnlock(&map_mutex);
180
181 return ((int)(destptr - dest));
182 }
183
184 _cupsMutexUnlock(&map_mutex);
185 #endif /* HAVE_ICONV_H */
186
187 /*
188 * No iconv() support, so error out...
189 */
190
191 *destptr = '\0';
192
193 return (-1);
194 }
195
196
197 /*
198 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
199 */
200
201 int /* O - Count or -1 on error */
202 cupsUTF8ToCharset(
203 char *dest, /* O - Target string */
204 const cups_utf8_t *src, /* I - Source string */
205 const int maxout, /* I - Max output */
206 const cups_encoding_t encoding) /* I - Encoding */
207 {
208 char *destptr; /* Pointer into destination */
209 size_t srclen, /* Length of source string */
210 outBytesLeft; /* Bytes remaining in output buffer */
211
212
213 /*
214 * Check for valid arguments...
215 */
216
217 if (!dest || !src || maxout < 1)
218 {
219 if (dest)
220 *dest = '\0';
221
222 return (-1);
223 }
224
225 /*
226 * Handle identity conversions...
227 */
228
229 if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
230 encoding >= CUPS_ENCODING_VBCS_END)
231 {
232 strlcpy(dest, (char *)src, maxout);
233 return ((int)strlen(dest));
234 }
235
236 /*
237 * Handle UTF-8 to ISO-8859-1 directly...
238 */
239
240 destptr = dest;
241
242 if (encoding == CUPS_ISO8859_1)
243 {
244 int ch; /* Character from string */
245 char *destend; /* End of ISO-8859-1 buffer */
246
247
248 destend = dest + maxout - 1;
249
250 while (*src && destptr < destend)
251 {
252 ch = *src++;
253
254 if ((ch & 0xe0) == 0xc0)
255 {
256 ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
257
258 if (ch < 256)
259 *destptr++ = ch;
260 else
261 *destptr++ = '?';
262 }
263 else if ((ch & 0xf0) == 0xe0 ||
264 (ch & 0xf8) == 0xf0)
265 *destptr++ = '?';
266 else if (!(ch & 0x80))
267 *destptr++ = ch;
268 }
269
270 *destptr = '\0';
271
272 return ((int)(destptr - dest));
273 }
274
275 #ifdef HAVE_ICONV_H
276 /*
277 * Convert input UTF-8 to legacy charset...
278 */
279
280 _cupsMutexLock(&map_mutex);
281
282 if (map_encoding != encoding)
283 {
284 _cupsCharmapFlush();
285
286 map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
287 map_to_utf8 = iconv_open("UTF-8", _cupsEncodingName(encoding));
288 map_encoding = encoding;
289 }
290
291 if (map_from_utf8 != (iconv_t)-1)
292 {
293 srclen = strlen((char *)src);
294 outBytesLeft = maxout - 1;
295
296 iconv(map_from_utf8, (char **)&src, &srclen, &destptr, &outBytesLeft);
297 *destptr = '\0';
298
299 _cupsMutexUnlock(&map_mutex);
300
301 return ((int)(destptr - dest));
302 }
303
304 _cupsMutexUnlock(&map_mutex);
305 #endif /* HAVE_ICONV_H */
306
307 /*
308 * No iconv() support, so error out...
309 */
310
311 *destptr = '\0';
312
313 return (-1);
314 }
315
316
317 /*
318 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
319 *
320 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
321 *
322 * UTF-32 char UTF-8 char(s)
323 * --------------------------------------------------
324 * 0 to 127 = 0xxxxxxx (US-ASCII)
325 * 128 to 2047 = 110xxxxx 10yyyyyy
326 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
327 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
328 *
329 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
330 * which would convert to five- or six-octet UTF-8 sequences...
331 */
332
333 int /* O - Count or -1 on error */
334 cupsUTF8ToUTF32(
335 cups_utf32_t *dest, /* O - Target string */
336 const cups_utf8_t *src, /* I - Source string */
337 const int maxout) /* I - Max output */
338 {
339 int i; /* Looping variable */
340 cups_utf8_t ch; /* Character value */
341 cups_utf8_t next; /* Next character value */
342 cups_utf32_t ch32; /* UTF-32 character value */
343
344
345 /*
346 * Check for valid arguments and clear output...
347 */
348
349 DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", dest,
350 src, maxout));
351
352 if (dest)
353 *dest = 0;
354
355 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
356 {
357 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad arguments)");
358
359 return (-1);
360 }
361
362 /*
363 * Convert input UTF-8 to output UTF-32...
364 */
365
366 for (i = maxout - 1; *src && i > 0; i --)
367 {
368 ch = *src++;
369
370 /*
371 * Convert UTF-8 character(s) to UTF-32 character...
372 */
373
374 if (!(ch & 0x80))
375 {
376 /*
377 * One-octet UTF-8 <= 127 (US-ASCII)...
378 */
379
380 *dest++ = ch;
381
382 DEBUG_printf(("4cupsUTF8ToUTF32: %02x => %08X", src[-1], ch));
383 continue;
384 }
385 else if ((ch & 0xe0) == 0xc0)
386 {
387 /*
388 * Two-octet UTF-8 <= 2047 (Latin-x)...
389 */
390
391 next = *src++;
392 if ((next & 0xc0) != 0x80)
393 {
394 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
395
396 return (-1);
397 }
398
399 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
400
401 /*
402 * Check for non-shortest form (invalid UTF-8)...
403 */
404
405 if (ch32 < 0x80)
406 {
407 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
408
409 return (-1);
410 }
411
412 *dest++ = ch32;
413
414 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x => %08X",
415 src[-2], src[-1], (unsigned)ch32));
416 }
417 else if ((ch & 0xf0) == 0xe0)
418 {
419 /*
420 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
421 */
422
423 next = *src++;
424 if ((next & 0xc0) != 0x80)
425 {
426 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
427
428 return (-1);
429 }
430
431 ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
432
433 next = *src++;
434 if ((next & 0xc0) != 0x80)
435 {
436 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
437
438 return (-1);
439 }
440
441 ch32 = (ch32 << 6) | (next & 0x3f);
442
443 /*
444 * Check for non-shortest form (invalid UTF-8)...
445 */
446
447 if (ch32 < 0x800)
448 {
449 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
450
451 return (-1);
452 }
453
454 *dest++ = ch32;
455
456 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x => %08X",
457 src[-3], src[-2], src[-1], (unsigned)ch32));
458 }
459 else if ((ch & 0xf8) == 0xf0)
460 {
461 /*
462 * Four-octet UTF-8...
463 */
464
465 next = *src++;
466 if ((next & 0xc0) != 0x80)
467 {
468 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
469
470 return (-1);
471 }
472
473 ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
474
475 next = *src++;
476 if ((next & 0xc0) != 0x80)
477 {
478 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
479
480 return (-1);
481 }
482
483 ch32 = (ch32 << 6) | (next & 0x3f);
484
485 next = *src++;
486 if ((next & 0xc0) != 0x80)
487 {
488 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
489
490 return (-1);
491 }
492
493 ch32 = (ch32 << 6) | (next & 0x3f);
494
495 /*
496 * Check for non-shortest form (invalid UTF-8)...
497 */
498
499 if (ch32 < 0x10000)
500 {
501 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
502
503 return (-1);
504 }
505
506 *dest++ = ch32;
507
508 DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X",
509 src[-4], src[-3], src[-2], src[-1], (unsigned)ch32));
510 }
511 else
512 {
513 /*
514 * More than 4-octet (invalid UTF-8 sequence)...
515 */
516
517 DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
518
519 return (-1);
520 }
521
522 /*
523 * Check for UTF-16 surrogate (illegal UTF-8)...
524 */
525
526 if (ch32 >= 0xd800 && ch32 <= 0xdfff)
527 return (-1);
528 }
529
530 *dest = 0;
531
532 DEBUG_printf(("3cupsUTF8ToUTF32: Returning %d characters", maxout - 1 - i));
533
534 return (maxout - 1 - i);
535 }
536
537
538 /*
539 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
540 *
541 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
542 *
543 * UTF-32 char UTF-8 char(s)
544 * --------------------------------------------------
545 * 0 to 127 = 0xxxxxxx (US-ASCII)
546 * 128 to 2047 = 110xxxxx 10yyyyyy
547 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
548 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
549 *
550 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
551 * which would convert to five- or six-octet UTF-8 sequences...
552 */
553
554 int /* O - Count or -1 on error */
555 cupsUTF32ToUTF8(
556 cups_utf8_t *dest, /* O - Target string */
557 const cups_utf32_t *src, /* I - Source string */
558 const int maxout) /* I - Max output */
559 {
560 cups_utf8_t *start; /* Start of destination string */
561 int i; /* Looping variable */
562 int swap; /* Byte-swap input to output */
563 cups_utf32_t ch; /* Character value */
564
565
566 /*
567 * Check for valid arguments and clear output...
568 */
569
570 DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", dest, src,
571 maxout));
572
573 if (dest)
574 *dest = '\0';
575
576 if (!dest || !src || maxout < 1)
577 {
578 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (bad args)");
579
580 return (-1);
581 }
582
583 /*
584 * Check for leading BOM in UTF-32 and inverted BOM...
585 */
586
587 start = dest;
588 swap = *src == 0xfffe0000;
589
590 DEBUG_printf(("4cupsUTF32ToUTF8: swap=%d", swap));
591
592 if (*src == 0xfffe0000 || *src == 0xfeff)
593 src ++;
594
595 /*
596 * Convert input UTF-32 to output UTF-8...
597 */
598
599 for (i = maxout - 1; *src && i > 0;)
600 {
601 ch = *src++;
602
603 /*
604 * Byte swap input UTF-32, if necessary...
605 * (only byte-swapping 24 of 32 bits)
606 */
607
608 if (swap)
609 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
610
611 /*
612 * Check for beyond Plane 16 (invalid UTF-32)...
613 */
614
615 if (ch > 0x10ffff)
616 {
617 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (character out of range)");
618
619 return (-1);
620 }
621
622 /*
623 * Convert UTF-32 character to UTF-8 character(s)...
624 */
625
626 if (ch < 0x80)
627 {
628 /*
629 * One-octet UTF-8 <= 127 (US-ASCII)...
630 */
631
632 *dest++ = (cups_utf8_t)ch;
633 i --;
634
635 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x", (unsigned)ch, dest[-1]));
636 }
637 else if (ch < 0x800)
638 {
639 /*
640 * Two-octet UTF-8 <= 2047 (Latin-x)...
641 */
642
643 if (i < 2)
644 {
645 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 2)");
646
647 return (-1);
648 }
649
650 *dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
651 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
652 i -= 2;
653
654 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x", (unsigned)ch,
655 dest[-2], dest[-1]));
656 }
657 else if (ch < 0x10000)
658 {
659 /*
660 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
661 */
662
663 if (i < 3)
664 {
665 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 3)");
666
667 return (-1);
668 }
669
670 *dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
671 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
672 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
673 i -= 3;
674
675 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x", (unsigned)ch,
676 dest[-3], dest[-2], dest[-1]));
677 }
678 else
679 {
680 /*
681 * Four-octet UTF-8...
682 */
683
684 if (i < 4)
685 {
686 DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 4)");
687
688 return (-1);
689 }
690
691 *dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
692 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
693 *dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
694 *dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
695 i -= 4;
696
697 DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x",
698 (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]));
699 }
700 }
701
702 *dest = '\0';
703
704 DEBUG_printf(("3cupsUTF32ToUTF8: Returning %d", (int)(dest - start)));
705
706 return ((int)(dest - start));
707 }
708
709
710 /*
711 * 'compare_wide()' - Compare key for wide (VBCS) match.
712 */
713
714 static int
715 compare_wide(const void *k1, /* I - Key char */
716 const void *k2) /* I - Map char */
717 {
718 cups_vbcs_t key; /* Legacy key character */
719 cups_vbcs_t map; /* Legacy map character */
720
721
722 key = *((cups_vbcs_t *)k1);
723 map = ((_cups_wide2uni_t *)k2)->widechar;
724
725 return ((int)(key - map));
726 }
727
728
729 /*
730 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
731 */
732
733 static int /* O - Count or -1 on error */
734 conv_sbcs_to_utf8(
735 cups_utf8_t *dest, /* O - Target string */
736 const cups_sbcs_t *src, /* I - Source string */
737 int maxout, /* I - Max output */
738 const cups_encoding_t encoding) /* I - Encoding */
739 {
740 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
741 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
742 cups_sbcs_t legchar; /* Legacy character value */
743 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
744 *workptr; /* Pointer into string */
745
746
747 /*
748 * Find legacy charset map in cache...
749 */
750
751 if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
752 return (-1);
753
754 /*
755 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
756 */
757
758 work[0] = 0xfeff;
759 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
760 {
761 legchar = *src++;
762
763 /*
764 * Convert ASCII verbatim (optimization)...
765 */
766
767 if (legchar < 0x80)
768 *workptr++ = (cups_utf32_t)legchar;
769 else
770 {
771 /*
772 * Convert unknown character to Replacement Character...
773 */
774
775 crow = cmap->char2uni + legchar;
776
777 if (!*crow)
778 *workptr++ = 0xfffd;
779 else
780 *workptr++ = (cups_utf32_t)*crow;
781 }
782 }
783
784 *workptr = 0;
785
786 /*
787 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
788 */
789
790 cmap->used --;
791
792 return (cupsUTF32ToUTF8(dest, work, maxout));
793 }
794
795
796 /*
797 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
798 */
799
800 static int /* O - Count or -1 on error */
801 conv_utf8_to_sbcs(
802 cups_sbcs_t *dest, /* O - Target string */
803 const cups_utf8_t *src, /* I - Source string */
804 int maxout, /* I - Max output */
805 const cups_encoding_t encoding) /* I - Encoding */
806 {
807 cups_sbcs_t *start; /* Start of destination string */
808 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
809 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
810 cups_utf32_t unichar; /* Character value */
811 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
812 *workptr; /* Pointer into string */
813
814
815 /*
816 * Find legacy charset map in cache...
817 */
818
819 if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
820 return (-1);
821
822 /*
823 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
824 */
825
826 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
827 return (-1);
828
829 /*
830 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
831 */
832
833 for (workptr = work, start = dest; *workptr && maxout > 0; maxout --)
834 {
835 unichar = *workptr++;
836 if (!unichar)
837 break;
838
839 /*
840 * Convert ASCII verbatim (optimization)...
841 */
842
843 if (unichar < 0x80)
844 {
845 *dest++ = (cups_sbcs_t)unichar;
846 continue;
847 }
848
849 /*
850 * Convert unknown character to visible replacement...
851 */
852
853 srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
854
855 if (srow)
856 srow += (int)(unichar & 0xff);
857
858 if (!srow || !*srow)
859 *dest++ = '?';
860 else
861 *dest++ = *srow;
862 }
863
864 *dest = '\0';
865
866 cmap->used --;
867
868 return ((int)(dest - start));
869 }
870
871
872 /*
873 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
874 */
875
876 static int /* O - Count or -1 on error */
877 conv_utf8_to_vbcs(
878 cups_sbcs_t *dest, /* O - Target string */
879 const cups_utf8_t *src, /* I - Source string */
880 int maxout, /* I - Max output */
881 const cups_encoding_t encoding) /* I - Encoding */
882 {
883 cups_sbcs_t *start; /* Start of destination string */
884 _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
885 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
886 cups_utf32_t unichar; /* Character value */
887 cups_vbcs_t legchar; /* Legacy character value */
888 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
889 *workptr; /* Pointer into string */
890
891
892 DEBUG_printf(("7conv_utf8_to_vbcs(dest=%p, src=\"%s\", maxout=%d, "
893 "encoding=%d)", dest, src, maxout, encoding));
894
895 /*
896 * Find legacy charset map in cache...
897 */
898
899 if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
900 {
901 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (no charmap)");
902
903 return (-1);
904 }
905
906 /*
907 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
908 */
909
910 if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
911 {
912 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (Unable to convert to UTF-32)");
913
914 return (-1);
915 }
916
917 /*
918 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
919 */
920
921 for (start = dest, workptr = work; *workptr && maxout > 0; maxout --)
922 {
923 unichar = *workptr++;
924
925 /*
926 * Convert ASCII verbatim (optimization)...
927 */
928
929 if (unichar < 0x80)
930 {
931 *dest++ = (cups_sbcs_t)unichar;
932
933 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X", (unsigned)unichar,
934 dest[-1]));
935
936 continue;
937 }
938
939 /*
940 * Convert unknown character to visible replacement...
941 */
942
943 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
944
945 if (vrow)
946 vrow += (int)(unichar & 0xff);
947
948 if (!vrow || !*vrow)
949 legchar = (cups_vbcs_t)'?';
950 else
951 legchar = (cups_vbcs_t)*vrow;
952
953 /*
954 * Save n-byte legacy character...
955 */
956
957 if (legchar > 0xffffff)
958 {
959 if (maxout < 5)
960 {
961 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
962
963 return (-1);
964 }
965
966 *dest++ = (cups_sbcs_t)(legchar >> 24);
967 *dest++ = (cups_sbcs_t)(legchar >> 16);
968 *dest++ = (cups_sbcs_t)(legchar >> 8);
969 *dest++ = (cups_sbcs_t)legchar;
970
971 maxout -= 3;
972
973 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X %02X",
974 (unsigned)unichar, dest[-4], dest[-3], dest[-2], dest[-1]));
975 }
976 else if (legchar > 0xffff)
977 {
978 if (maxout < 4)
979 {
980 DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
981
982 return (-1);
983 }
984
985 *dest++ = (cups_sbcs_t)(legchar >> 16);
986 *dest++ = (cups_sbcs_t)(legchar >> 8);
987 *dest++ = (cups_sbcs_t)legchar;
988
989 maxout -= 2;
990
991 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X",
992 (unsigned)unichar, dest[-3], dest[-2], dest[-1]));
993 }
994 else if (legchar > 0xff)
995 {
996 *dest++ = (cups_sbcs_t)(legchar >> 8);
997 *dest++ = (cups_sbcs_t)legchar;
998
999 maxout --;
1000
1001 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X",
1002 (unsigned)unichar, dest[-2], dest[-1]));
1003 }
1004 else
1005 {
1006 *dest++ = (cups_sbcs_t)legchar;
1007
1008 DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X",
1009 (unsigned)unichar, dest[-1]));
1010 }
1011 }
1012
1013 *dest = '\0';
1014
1015 vmap->used --;
1016
1017 DEBUG_printf(("8conv_utf8_to_vbcs: Returning %d characters",
1018 (int)(dest - start)));
1019
1020 return ((int)(dest - start));
1021 }
1022
1023
1024 /*
1025 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1026 */
1027
1028 static int /* O - Count or -1 on error */
1029 conv_vbcs_to_utf8(
1030 cups_utf8_t *dest, /* O - Target string */
1031 const cups_sbcs_t *src, /* I - Source string */
1032 int maxout, /* I - Max output */
1033 const cups_encoding_t encoding) /* I - Encoding */
1034 {
1035 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1036 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1037 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1038 cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
1039 cups_vbcs_t legchar; /* Legacy character value */
1040 cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
1041 *workptr; /* Pointer into string */
1042
1043
1044 /*
1045 * Find legacy charset map in cache...
1046 */
1047
1048 DEBUG_printf(("7conv_vbcs_to_utf8(dest=%p, src=%p, maxout=%d, encoding=%d)",
1049 dest, src, maxout, encoding));
1050
1051 if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
1052 {
1053 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (NULL vmap)");
1054
1055 return (-1);
1056 }
1057
1058 /*
1059 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1060 */
1061
1062 work[0] = 0xfeff;
1063 for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
1064 {
1065 legchar = *src++;
1066 leadchar = (cups_sbcs_t)legchar;
1067
1068 /*
1069 * Convert ASCII verbatim (optimization)...
1070 */
1071
1072 if (legchar < 0x80)
1073 {
1074 *workptr++ = (cups_utf32_t)legchar;
1075
1076 DEBUG_printf(("9conv_vbcs_to_utf8: %02X => %08X", src[-1],
1077 (unsigned)legchar));
1078 continue;
1079 }
1080
1081 /*
1082 * Convert 2-byte legacy character...
1083 */
1084
1085 if (vmap->lead2char[(int)leadchar] == leadchar)
1086 {
1087 if (!*src)
1088 {
1089 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string)");
1090
1091 return (-1);
1092 }
1093
1094 legchar = (legchar << 8) | *src++;
1095
1096 /*
1097 * Convert unknown character to Replacement Character...
1098 */
1099
1100 crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
1101 if (crow)
1102 crow += (int) (legchar & 0xff);
1103
1104 if (!crow || !*crow)
1105 *workptr++ = 0xfffd;
1106 else
1107 *workptr++ = (cups_utf32_t)*crow;
1108
1109 DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X => %08X",
1110 src[-2], src[-1], (unsigned)workptr[-1]));
1111 continue;
1112 }
1113
1114 /*
1115 * Fetch 3-byte or 4-byte legacy character...
1116 */
1117
1118 if (vmap->lead3char[(int)leadchar] == leadchar)
1119 {
1120 if (!*src || !src[1])
1121 {
1122 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 2)");
1123
1124 return (-1);
1125 }
1126
1127 legchar = (legchar << 8) | *src++;
1128 legchar = (legchar << 8) | *src++;
1129 }
1130 else if (vmap->lead4char[(int)leadchar] == leadchar)
1131 {
1132 if (!*src || !src[1] || !src[2])
1133 {
1134 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 3)");
1135
1136 return (-1);
1137 }
1138
1139 legchar = (legchar << 8) | *src++;
1140 legchar = (legchar << 8) | *src++;
1141 legchar = (legchar << 8) | *src++;
1142 }
1143 else
1144 {
1145 DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (bad character)");
1146
1147 return (-1);
1148 }
1149
1150 /*
1151 * Find 3-byte or 4-byte legacy character...
1152 */
1153
1154 wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
1155 vmap->wide2uni,
1156 vmap->widecount,
1157 sizeof(_cups_wide2uni_t),
1158 compare_wide);
1159
1160 /*
1161 * Convert unknown character to Replacement Character...
1162 */
1163
1164 if (!wide2uni || !wide2uni->unichar)
1165 *workptr++ = 0xfffd;
1166 else
1167 *workptr++ = wide2uni->unichar;
1168
1169 if (vmap->lead3char[(int)leadchar] == leadchar)
1170 DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X => %08X",
1171 src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1172 else
1173 DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X %02X => %08X",
1174 src[-4], src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
1175 }
1176
1177 *workptr = 0;
1178
1179 vmap->used --;
1180
1181 DEBUG_printf(("9conv_vbcs_to_utf8: Converting %d UTF-32 characters to UTF-8",
1182 (int)(workptr - work)));
1183
1184 /*
1185 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1186 */
1187
1188 return (cupsUTF32ToUTF8(dest, work, maxout));
1189 }
1190
1191
1192 /*
1193 * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
1194 */
1195
1196 static void
1197 free_sbcs_charmap(_cups_cmap_t *cmap) /* I - Character set */
1198 {
1199 int i; /* Looping variable */
1200
1201
1202 for (i = 0; i < 256; i ++)
1203 if (cmap->uni2char[i])
1204 free(cmap->uni2char[i]);
1205
1206 free(cmap);
1207 }
1208
1209
1210 /*
1211 * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1212 */
1213
1214 static void
1215 free_vbcs_charmap(_cups_vmap_t *vmap) /* I - Character set */
1216 {
1217 int i; /* Looping variable */
1218
1219
1220 for (i = 0; i < 256; i ++)
1221 if (vmap->char2uni[i])
1222 free(vmap->char2uni[i]);
1223
1224 for (i = 0; i < 256; i ++)
1225 if (vmap->uni2char[i])
1226 free(vmap->uni2char[i]);
1227
1228 if (vmap->wide2uni)
1229 free(vmap->wide2uni);
1230
1231 free(vmap);
1232 }
1233
1234
1235 /*
1236 * 'get_charmap()' - Lookup or get a character set map (private).
1237 *
1238 * This code handles single-byte (SBCS), double-byte (DBCS), and
1239 * variable-byte (VBCS) character sets _without_ charset escapes...
1240 * This code does not handle multiple-byte character sets (MBCS)
1241 * (such as ISO-2022-JP) with charset switching via escapes...
1242 */
1243
1244
1245 static void * /* O - Charset map pointer */
1246 get_charmap(
1247 const cups_encoding_t encoding) /* I - Encoding */
1248 {
1249 char filename[1024]; /* Filename for charset map file */
1250 _cups_globals_t *cg = _cupsGlobals(); /* Global data */
1251
1252
1253 DEBUG_printf(("7get_charmap(encoding=%d)", encoding));
1254
1255 /*
1256 * Get the data directory and charset map name...
1257 */
1258
1259 snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
1260 cg->cups_datadir, _cupsEncodingName(encoding));
1261
1262 DEBUG_printf(("9get_charmap: filename=\"%s\"", filename));
1263
1264 /*
1265 * Read charset map input file into cache...
1266 */
1267
1268 if (encoding < CUPS_ENCODING_SBCS_END)
1269 return (get_sbcs_charmap(encoding, filename));
1270 else if (encoding < CUPS_ENCODING_VBCS_END)
1271 return (get_vbcs_charmap(encoding, filename));
1272 else
1273 return (NULL);
1274 }
1275
1276
1277 /*
1278 * 'get_charmap_count()' - Count lines in a charmap file.
1279 */
1280
1281 static int /* O - Count or -1 on error */
1282 get_charmap_count(cups_file_t *fp) /* I - File to read from */
1283 {
1284 int count; /* Number of lines */
1285 char line[256]; /* Line from input map file */
1286
1287
1288 /*
1289 * Count lines in map input file...
1290 */
1291
1292 count = 0;
1293
1294 while (cupsFileGets(fp, line, sizeof(line)))
1295 if (line[0] == '0')
1296 count ++;
1297
1298 /*
1299 * Return the number of lines...
1300 */
1301
1302 if (count > 0)
1303 return (count);
1304 else
1305 return (-1);
1306 }
1307
1308
1309 /*
1310 * 'get_sbcs_charmap()' - Get SBCS Charmap.
1311 */
1312
1313 static _cups_cmap_t * /* O - Charmap or 0 on error */
1314 get_sbcs_charmap(
1315 const cups_encoding_t encoding, /* I - Charmap Encoding */
1316 const char *filename) /* I - Charmap Filename */
1317 {
1318 unsigned long legchar; /* Legacy character value */
1319 cups_utf32_t unichar; /* Unicode character value */
1320 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1321 cups_file_t *fp; /* Charset map file pointer */
1322 char *s; /* Line parsing pointer */
1323 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1324 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
1325 char line[256]; /* Line from charset map file */
1326
1327
1328 /*
1329 * See if we already have this SBCS charset map loaded...
1330 */
1331
1332 DEBUG_printf(("7get_sbcs_charmap(encoding=%d, filename=\"%s\")", encoding,
1333 filename));
1334
1335 for (cmap = cmap_cache; cmap; cmap = cmap->next)
1336 {
1337 if (cmap->encoding == encoding)
1338 {
1339 cmap->used ++;
1340 DEBUG_printf(("8get_sbcs_charmap: Returning existing cmap=%p", cmap));
1341
1342 return ((void *)cmap);
1343 }
1344 }
1345
1346 /*
1347 * Open SBCS charset map input file...
1348 */
1349
1350 if ((fp = cupsFileOpen(filename, "r")) == NULL)
1351 {
1352 DEBUG_printf(("8get_sbcs_charmap: Returning NULL (%s)", strerror(errno)));
1353
1354 return (NULL);
1355 }
1356
1357 /*
1358 * Allocate memory for SBCS charset map...
1359 */
1360
1361 if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
1362 {
1363 cupsFileClose(fp);
1364 DEBUG_puts("8get_sbcs_charmap: Returning NULL (Unable to allocate memory)");
1365
1366 return (NULL);
1367 }
1368
1369 cmap->used ++;
1370 cmap->encoding = encoding;
1371
1372 /*
1373 * Save SBCS charset map into memory for transcoding...
1374 */
1375
1376 while (cupsFileGets(fp, line, sizeof(line)))
1377 {
1378 if (line[0] != '0')
1379 continue;
1380
1381 legchar = strtol(line, &s, 16);
1382 if (legchar < 0 || legchar > 0xff)
1383 goto sbcs_error;
1384
1385 unichar = strtol(s, NULL, 16);
1386 if (unichar < 0 || unichar > 0x10ffff)
1387 goto sbcs_error;
1388
1389 /*
1390 * Save legacy to Unicode mapping in direct lookup table...
1391 */
1392
1393 crow = cmap->char2uni + legchar;
1394 *crow = (cups_ucs2_t)(unichar & 0xffff);
1395
1396 /*
1397 * Save Unicode to legacy mapping in indirect lookup table...
1398 */
1399
1400 srow = cmap->uni2char[(unichar >> 8) & 0xff];
1401 if (!srow)
1402 {
1403 srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
1404 if (!srow)
1405 goto sbcs_error;
1406
1407 cmap->uni2char[(unichar >> 8) & 0xff] = srow;
1408 }
1409
1410 srow += unichar & 0xff;
1411
1412 /*
1413 * Convert Replacement Character to visible replacement...
1414 */
1415
1416 if (unichar == 0xfffd)
1417 legchar = (unsigned long)'?';
1418
1419 /*
1420 * First (oldest) legacy character uses Unicode mapping cell...
1421 */
1422
1423 if (!*srow)
1424 *srow = (cups_sbcs_t)legchar;
1425 }
1426
1427 cupsFileClose(fp);
1428
1429 /*
1430 * Add it to the cache and return...
1431 */
1432
1433 cmap->next = cmap_cache;
1434 cmap_cache = cmap;
1435
1436 DEBUG_printf(("8get_sbcs_charmap: Returning new cmap=%p", cmap));
1437
1438 return (cmap);
1439
1440 /*
1441 * If we get here, there was an error in the cmap file...
1442 */
1443
1444 sbcs_error:
1445
1446 free_sbcs_charmap(cmap);
1447
1448 cupsFileClose(fp);
1449
1450 DEBUG_puts("8get_sbcs_charmap: Returning NULL (Read/format error)");
1451
1452 return (NULL);
1453 }
1454
1455
1456 /*
1457 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1458 */
1459
1460 static _cups_vmap_t * /* O - Charmap or 0 on error */
1461 get_vbcs_charmap(
1462 const cups_encoding_t encoding, /* I - Charmap Encoding */
1463 const char *filename) /* I - Charmap Filename */
1464 {
1465 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1466 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1467 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1468 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1469 cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
1470 unsigned long legchar; /* Legacy character value */
1471 cups_utf32_t unichar; /* Unicode character value */
1472 int mapcount; /* Count of lines in charmap file */
1473 cups_file_t *fp; /* Charset map file pointer */
1474 char *s; /* Line parsing pointer */
1475 char line[256]; /* Line from charset map file */
1476 int i; /* Loop variable */
1477 int legacy; /* 32-bit legacy char */
1478
1479
1480 DEBUG_printf(("7get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1481 encoding, filename));
1482
1483 /*
1484 * See if we already have this DBCS/VBCS charset map loaded...
1485 */
1486
1487 for (vmap = vmap_cache; vmap; vmap = vmap->next)
1488 {
1489 if (vmap->encoding == encoding)
1490 {
1491 vmap->used ++;
1492 DEBUG_printf(("8get_vbcs_charmap: Returning existing vmap=%p", vmap));
1493
1494 return ((void *)vmap);
1495 }
1496 }
1497
1498 /*
1499 * Open VBCS charset map input file...
1500 */
1501
1502 if ((fp = cupsFileOpen(filename, "r")) == NULL)
1503 {
1504 DEBUG_printf(("8get_vbcs_charmap: Returning NULL (%s)", strerror(errno)));
1505
1506 return (NULL);
1507 }
1508
1509 /*
1510 * Count lines in charmap file...
1511 */
1512
1513 if ((mapcount = get_charmap_count(fp)) <= 0)
1514 {
1515 DEBUG_puts("8get_vbcs_charmap: Unable to get charmap count!");
1516
1517 cupsFileClose(fp);
1518
1519 return (NULL);
1520 }
1521
1522 DEBUG_printf(("8get_vbcs_charmap: mapcount=%d", mapcount));
1523
1524 /*
1525 * Allocate memory for DBCS/VBCS charset map...
1526 */
1527
1528 if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
1529 {
1530 DEBUG_puts("8get_vbcs_charmap: Unable to allocate memory!");
1531
1532 cupsFileClose(fp);
1533
1534 return (NULL);
1535 }
1536
1537 vmap->used ++;
1538 vmap->encoding = encoding;
1539
1540 /*
1541 * Save DBCS/VBCS charset map into memory for transcoding...
1542 */
1543
1544 wide2uni = NULL;
1545
1546 cupsFileRewind(fp);
1547
1548 i = 0;
1549 legacy = 0;
1550
1551 while (cupsFileGets(fp, line, sizeof(line)))
1552 {
1553 if (line[0] != '0')
1554 continue;
1555
1556 legchar = strtoul(line, &s, 16);
1557 if (legchar == ULONG_MAX)
1558 goto vbcs_error;
1559
1560 unichar = strtol(s, NULL, 16);
1561 if (unichar < 0 || unichar > 0x10ffff)
1562 goto vbcs_error;
1563
1564 i ++;
1565
1566 DEBUG_printf(("9get_vbcs_charmap: i=%d, legchar=0x%08lx, unichar=0x%04x", i,
1567 legchar, (unsigned)unichar));
1568
1569 /*
1570 * Save lead char of 2/3/4-byte legacy char...
1571 */
1572
1573 if (legchar > 0xffffff)
1574 {
1575 leadchar = (cups_sbcs_t)(legchar >> 24);
1576 vmap->lead4char[leadchar] = leadchar;
1577 }
1578 else if (legchar > 0xffff)
1579 {
1580 leadchar = (cups_sbcs_t)(legchar >> 16);
1581 vmap->lead3char[leadchar] = leadchar;
1582 }
1583 else
1584 {
1585 leadchar = (cups_sbcs_t)(legchar >> 8);
1586 vmap->lead2char[leadchar] = leadchar;
1587 }
1588
1589 /*
1590 * Save Legacy to Unicode mapping...
1591 */
1592
1593 if (legchar <= 0xffff)
1594 {
1595 /*
1596 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1597 */
1598
1599 crow = vmap->char2uni[(int)leadchar];
1600 if (!crow)
1601 {
1602 crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
1603 if (!crow)
1604 goto vbcs_error;
1605
1606 vmap->char2uni[(int)leadchar] = crow;
1607 }
1608
1609 crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
1610 }
1611 else
1612 {
1613 /*
1614 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1615 */
1616
1617 if (!legacy)
1618 {
1619 legacy = 1;
1620 vmap->widecount = (mapcount - i + 1);
1621 wide2uni = (_cups_wide2uni_t *)calloc(vmap->widecount,
1622 sizeof(_cups_wide2uni_t));
1623 if (!wide2uni)
1624 goto vbcs_error;
1625
1626 vmap->wide2uni = wide2uni;
1627 }
1628
1629 wide2uni->widechar = (cups_vbcs_t)legchar;
1630 wide2uni->unichar = (cups_ucs2_t)unichar;
1631 wide2uni ++;
1632 }
1633
1634 /*
1635 * Save Unicode to legacy mapping in indirect lookup table...
1636 */
1637
1638 vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
1639 if (!vrow)
1640 {
1641 vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
1642 if (!vrow)
1643 goto vbcs_error;
1644
1645 vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1646 }
1647
1648 vrow += (int)(unichar & 0xff);
1649
1650 /*
1651 * Convert Replacement Character to visible replacement...
1652 */
1653
1654 if (unichar == 0xfffd)
1655 legchar = (unsigned long)'?';
1656
1657 /*
1658 * First (oldest) legacy character uses Unicode mapping cell...
1659 */
1660
1661 if (!*vrow)
1662 *vrow = (cups_vbcs_t)legchar;
1663 }
1664
1665 vmap->charcount = (i - vmap->widecount);
1666
1667 cupsFileClose(fp);
1668
1669 /*
1670 * Add it to the cache and return...
1671 */
1672
1673 vmap->next = vmap_cache;
1674 vmap_cache = vmap;
1675
1676 DEBUG_printf(("8get_vbcs_charmap: Returning new vmap=%p", vmap));
1677
1678 return (vmap);
1679
1680 /*
1681 * If we get here, the file contains errors...
1682 */
1683
1684 vbcs_error:
1685
1686 free_vbcs_charmap(vmap);
1687
1688 cupsFileClose(fp);
1689
1690 DEBUG_puts("8get_vbcs_charmap: Returning NULL (Read/format error)");
1691
1692 return (NULL);
1693 }
1694
1695
1696 /*
1697 * End of "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
1698 */