]> git.ipfire.org Git - thirdparty/cups.git/blame - cups/transcode.c
To prepare to load cups into easysw/current, perform 4 renames.
[thirdparty/cups.git] / cups / transcode.c
CommitLineData
ef416fc2 1/*
2 * "$Id: transcode.c 4903 2006-01-10 20:02:46Z mike $"
3 *
4 * Transcoding support for the Common UNIX Printing System (CUPS).
5 *
6 * Copyright 1997-2006 by Easy Software Products.
7 *
8 * These coded instructions, statements, and computer programs are
9 * the property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the
11 * file "LICENSE.txt" which should have been included with this file.
12 * If this file is missing or damaged please contact Easy Software
13 * Products at:
14 *
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
19 *
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
23 *
24 * Contents:
25 *
26 * cupsCharmapGet() - Get a character set map.
27 * cupsCharmapFree() - Free a character set map.
28 * cupsCharmapFlush() - Flush all character set maps out of cache.
29 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
30 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
31 * cupsUTF8ToUTF16() - Convert UTF-8 to UTF-16.
32 * cupsUTF16ToUTF8() - Convert UTF-16 to UTF-8.
33 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
34 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
35 * cupsUTF16ToUTF32() - Convert UTF-16 to UTF-32.
36 * cupsUTF32ToUTF16() - Convert UTF-32 to UTF-16.
37 * get_charmap_count() - Count lines in a charmap file.
38 * get_sbcs_charmap() - Get SBCS Charmap.
39 * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
40 * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
41 * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
42 * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
43 * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
44 * compare_wide() - Compare key for wide (VBCS) match.
45 */
46
47/*
48 * Include necessary headers...
49 */
50
51#include "globals.h"
52#include <stdlib.h>
53#include <errno.h>
54#include <time.h>
55
56
57/*
58 * Prototypes...
59 */
60
61static int get_charmap_count(const char *filename);
62static _cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding,
63 const char *filename);
64static _cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding,
65 const char *filename);
66
67static int conv_utf8_to_sbcs(char *dest,
68 const cups_utf8_t *src,
69 const int maxout,
70 const cups_encoding_t encoding);
71static int conv_utf8_to_vbcs(char *dest,
72 const cups_utf8_t *src,
73 const int maxout,
74 const cups_encoding_t encoding);
75
76static int conv_sbcs_to_utf8(cups_utf8_t *dest,
77 const char *src,
78 const int maxout,
79 const cups_encoding_t encoding);
80static int conv_vbcs_to_utf8(cups_utf8_t *dest,
81 const char *src,
82 const int maxout,
83 const cups_encoding_t encoding);
84
85static int compare_wide(const void *k1, const void *k2);
86
87/*
88 * 'cupsCharmapGet()' - Get a character set map.
89 *
90 * This code handles single-byte (SBCS), double-byte (DBCS), and
91 * variable-byte (VBCS) character sets _without_ charset escapes...
92 * This code does not handle multiple-byte character sets (MBCS)
93 * (such as ISO-2022-JP) with charset switching via escapes...
94 */
95
96void * /* O - Charset map pointer */
97cupsCharmapGet(
98 const cups_encoding_t encoding) /* I - Encoding */
99{
100 char mapname[80]; /* Name of charset map */
101 char filename[1024]; /* Filename for charset map file */
102 _cups_globals_t *cg = _cupsGlobals(); /* Global data */
103
104
105 /*
106 * Check for valid arguments...
107 */
108
109 if ((encoding < 0) || (encoding >= CUPS_ENCODING_VBCS_END))
110 return (NULL);
111
112 /*
113 * Get the data directory and charset map name...
114 */
115
116 snprintf(mapname, sizeof(mapname), "%s.txt", _cupsEncodingName(encoding));
117 snprintf(filename, sizeof(filename), "%s/charmaps/%s",
118 cg->cups_datadir, mapname);
119
120 /*
121 * Read charset map input file into cache...
122 */
123
124 if (encoding < CUPS_ENCODING_SBCS_END)
125 return (get_sbcs_charmap(encoding, filename));
126 else if (encoding < CUPS_ENCODING_VBCS_END)
127 return (get_vbcs_charmap(encoding, filename));
128 else
129 return (NULL);
130}
131
132/*
133 * 'cupsCharmapFree()' - Free a character set map.
134 *
135 * This does not actually free; use 'cupsCharmapFlush()' for that.
136 */
137void
138cupsCharmapFree(const cups_encoding_t encoding)
139 /* I - Encoding */
140{
141 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
142 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
143 _cups_globals_t *cg = _cupsGlobals();
144 /* Pointer to library globals */
145
146 /*
147 * See if we already have this SBCS charset map loaded...
148 */
149 for (cmap = cg->cmap_cache; cmap != NULL; cmap = cmap->next)
150 {
151 if (cmap->encoding == encoding)
152 {
153 if (cmap->used > 0)
154 cmap->used --;
155 return;
156 }
157 }
158
159 /*
160 * See if we already have this DBCS/VBCS charset map loaded...
161 */
162 for (vmap = cg->vmap_cache; vmap != NULL; vmap = vmap->next)
163 {
164 if (vmap->encoding == encoding)
165 {
166 if (vmap->used > 0)
167 vmap->used --;
168 return;
169 }
170 }
171 return;
172}
173
174/*
175 * 'cupsCharmapFlush()' - Flush all character set maps out of cache.
176 */
177void
178cupsCharmapFlush(void)
179{
180 int i; /* Looping variable */
181 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
182 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
183 _cups_cmap_t *cnext; /* Next Legacy SBCS Charset Map */
184 _cups_vmap_t *vnext; /* Next Legacy VBCS Charset Map */
185 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
186 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
187 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
188 _cups_globals_t *cg = _cupsGlobals();
189 /* Pointer to library globals */
190
191 /*
192 * Loop through SBCS charset map cache, free all memory...
193 */
194 for (cmap = cg->cmap_cache; cmap != NULL; cmap = cnext)
195 {
196 for (i = 0; i < 256; i ++)
197 {
198 if ((srow = cmap->uni2char[i]) != NULL)
199 free(srow);
200 }
201 cnext = cmap->next;
202 free(cmap);
203 }
204 cg->cmap_cache = NULL;
205
206 /*
207 * Loop through DBCS/VBCS charset map cache, free all memory...
208 */
209 for (vmap = cg->vmap_cache; vmap != NULL; vmap = vnext)
210 {
211 for (i = 0; i < 256; i ++)
212 {
213 if ((crow = vmap->char2uni[i]) != NULL)
214 free(crow);
215 }
216 for (i = 0; i < 256; i ++)
217 {
218 if ((vrow = vmap->uni2char[i]) != NULL)
219 free(vrow);
220 }
221 if (vmap->wide2uni)
222 free(vmap->wide2uni);
223 vnext = vmap->next;
224 free(vmap);
225 }
226 cg->vmap_cache = NULL;
227 return;
228}
229
230/*
231 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
232 *
233 * This code handles single-byte (SBCS), double-byte (DBCS), and
234 * variable-byte (VBCS) character sets _without_ charset escapes...
235 * This code does not handle multiple-byte character sets (MBCS)
236 * (such as ISO-2022-JP) with charset switching via escapes...
237 */
238int /* O - Count or -1 on error */
239cupsUTF8ToCharset(char *dest, /* O - Target string */
240 const cups_utf8_t *src, /* I - Source string */
241 const int maxout, /* I - Max output */
242 const cups_encoding_t encoding) /* I - Encoding */
243{
244 /*
245 * Check for valid arguments...
246 */
247
248 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
249 return (-1);
250
251 /*
252 * Handle identity conversions...
253 */
254
255 if (encoding == CUPS_UTF8 ||
256 encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
257 {
258 strlcpy(dest, (char *)src, maxout);
259 return (strlen(dest));
260 }
261
262 /*
263 * Convert input UTF-8 to legacy charset...
264 */
265 if (encoding < CUPS_ENCODING_SBCS_END)
266 return (conv_utf8_to_sbcs(dest, src, maxout, encoding));
267 else if (encoding < CUPS_ENCODING_VBCS_END)
268 return (conv_utf8_to_vbcs(dest, src, maxout, encoding));
269 else
270 return (-1);
271}
272
273/*
274 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
275 *
276 * This code handles single-byte (SBCS), double-byte (DBCS), and
277 * variable-byte (VBCS) character sets _without_ charset escapes...
278 * This code does not handle multiple-byte character sets (MBCS)
279 * (such as ISO-2022-JP) with charset switching via escapes...
280 */
281int /* O - Count or -1 on error */
282cupsCharsetToUTF8(cups_utf8_t *dest, /* O - Target string */
283 const char *src, /* I - Source string */
284 const int maxout, /* I - Max output */
285 const cups_encoding_t encoding) /* I - Encoding */
286{
287 /*
288 * Check for valid arguments...
289 */
290
291 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
292 return (-1);
293
294 /*
295 * Handle identity conversions...
296 */
297
298 if (encoding == CUPS_UTF8 ||
299 encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
300 {
301 strlcpy((char *)dest, src, maxout);
302 return (strlen((char *)dest));
303 }
304
305 /*
306 * Convert input legacy charset to UTF-8...
307 */
308 if (encoding < CUPS_ENCODING_SBCS_END)
309 return (conv_sbcs_to_utf8(dest, src, maxout, encoding));
310 else if (encoding < CUPS_ENCODING_VBCS_END)
311 return (conv_vbcs_to_utf8(dest, src, maxout, encoding));
312 else
313 return (-1);
314}
315
316/*
317 * 'cupsUTF8ToUTF16()' - Convert UTF-8 to UTF-16.
318 *
319 * This code does not support Unicode beyond 16-bits (Plane 0)...
320 */
321int /* O - Count or -1 on error */
322cupsUTF8ToUTF16(cups_utf16_t *dest, /* O - Target string */
323 const cups_utf8_t *src, /* I - Source string */
324 const int maxout) /* I - Max output */
325{
326 int worklen; /* Internal UCS-4 string length */
327 cups_utf32_t work[CUPS_MAX_USTRING];
328 /* Internal UCS-4 string */
329
330 /*
331 * Check for valid arguments and clear output...
332 */
333 if ((dest == NULL)
334 || (src == NULL)
335 || (maxout < 1)
336 || (maxout > CUPS_MAX_USTRING))
337 return (-1);
338 *dest = 0;
339
340 /*
341 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
342 */
343 worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
344 if (worklen < 0)
345 return (-1);
346
347 /*
348 * Convert internal UCS-4 to output UTF-16...
349 */
350 worklen = cupsUTF32ToUTF16(dest, work, maxout);
351 return (worklen);
352}
353
354/*
355 * 'cupsUTF16ToUTF8()' - Convert UTF-16 to UTF-8.
356 *
357 * This code does not support Unicode beyond 16-bits (Plane 0)...
358 */
359int /* O - Count or -1 on error */
360cupsUTF16ToUTF8(cups_utf8_t *dest, /* O - Target string */
361 const cups_utf16_t *src, /* I - Source string */
362 const int maxout) /* I - Max output */
363{
364 int worklen; /* Internal UCS-4 string length */
365 cups_utf32_t work[CUPS_MAX_USTRING];
366 /* Internal UCS-4 string */
367
368 /*
369 * Check for valid arguments and clear output...
370 */
371 if ((dest == NULL)
372 || (src == NULL)
373 || (maxout < 1)
374 || (maxout > CUPS_MAX_USTRING))
375 return (-1);
376 *dest = 0;
377
378 /*
379 * Convert input UTF-16 to internal UCS-4 (and byte-swap)...
380 */
381 worklen = cupsUTF16ToUTF32(work, src, CUPS_MAX_USTRING);
382 if (worklen < 0)
383 return (-1);
384
385 /*
386 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
387 */
388 worklen = cupsUTF32ToUTF8(dest, work, maxout);
389 return (worklen);
390}
391
392/*
393 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
394 *
395 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
396 *
397 * UTF-32 char UTF-8 char(s)
398 * --------------------------------------------------
399 * 0 to 127 = 0xxxxxxx (US-ASCII)
400 * 128 to 2047 = 110xxxxx 10yyyyyy
401 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
402 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
403 *
404 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
405 * which would convert to five- or six-octet UTF-8 sequences...
406 *
407 * This code does not support Unicode beyond 16-bits (Plane 0)...
408 */
409int /* O - Count or -1 on error */
410cupsUTF8ToUTF32(cups_utf32_t *dest, /* O - Target string */
411 const cups_utf8_t *src, /* I - Source string */
412 const int maxout) /* I - Max output */
413{
414 cups_utf8_t *first = (cups_utf8_t *) src;
415 size_t srclen; /* Source string length */
416 int i; /* Looping variable */
417 cups_utf32_t ch; /* Character value */
418 cups_utf32_t next; /* Next character value */
419 cups_utf32_t ch32; /* UTF-32 character value */
420
421 /*
422 * Check for valid arguments and clear output...
423 */
424 if ((dest == NULL)
425 || (src == NULL)
426 || (maxout < 1)
427 || (maxout > CUPS_MAX_USTRING))
428 return (-1);
429 *dest = 0;
430
431 /*
432 * Convert input UTF-8 to output UTF-32 (and insert BOM)...
433 */
434 *dest = 0xfeff;
435 dest ++;
436 srclen = strlen((char *) src);
437 for (i = 1; i < (maxout - 1); src ++, dest ++)
438 {
439 ch = (cups_utf32_t) *src;
440 ch &= 0xff;
441 if (ch == 0)
442 break;
443 i ++;
444
445 /*
446 * Convert UTF-8 character(s) to UTF-32 character...
447 */
448 if ((ch & 0x7f) == ch)
449 {
450 /*
451 * One-octet UTF-8 <= 127 (US-ASCII)...
452 */
453 *dest = ch;
454 }
455 else if ((ch & 0xe0) == 0xc0)
456 {
457 /*
458 * Two-octet UTF-8 <= 2047 (Latin-x)...
459 */
460 src ++;
461 next = (cups_utf32_t) *src;
462 next &= 0xff;
463 if (next == 0)
464 return (-1);
465 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
466
467 /*
468 * Check for non-shortest form (invalid UTF-8)...
469 */
470 if (ch32 <= 127)
471 return (-1);
472 *dest = ch32;
473 }
474 else if ((ch & 0xf0) == 0xe0)
475 {
476 /*
477 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
478 */
479 src ++;
480 next = (cups_utf32_t) *src;
481 next &= 0xff;
482 if (next == 0)
483 return (-1);
484 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
485 src ++;
486 next = (cups_utf32_t) *src;
487 next &= 0xff;
488 if (next == 0)
489 return (-1);
490 ch32 = ((ch32 << 6) | (next & 0x3f));
491
492 /*
493 * Check for non-shortest form (invalid UTF-8)...
494 */
495 if (ch32 <= 2047)
496 return (-1);
497 *dest = ch32;
498 }
499 else if ((ch & 0xf8) == 0xf0)
500 {
501 /*
502 * Four-octet UTF-8 to Replacement Character...
503 */
504 if (((src - first) + 3) >= srclen)
505 return (-1);
506 src += 3;
507 *dest = 0xfffd;
508 }
509 else if ((ch & 0xfc) == 0xf8)
510 {
511 /*
512 * Five-octet UTF-8 (invalid strict UTF-32)...
513 */
514 return (-1);
515 }
516 else if ((ch & 0xfe) == 0xfc)
517 {
518 /*
519 * Six-octet UTF-8 (invalid strict UTF-32)...
520 */
521 return (-1);
522 }
523 else
524 {
525 /*
526 * More than six-octet (invalid UTF-8 sequence)...
527 */
528 return (-1);
529 }
530
531 /*
532 * Check for UTF-16 surrogate (illegal UTF-8)...
533 */
534 if ((*dest >= 0xd800) && (*dest <= 0xdfff))
535 return (-1);
536
537 /*
538 * Check for beyond Plane 16 (invalid UTF-8)...
539 */
540 if (*dest > 0x10ffff)
541 return (-1);
542 }
543 *dest = 0;
544 return (i);
545}
546
547/*
548 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
549 *
550 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
551 *
552 * UTF-32 char UTF-8 char(s)
553 * --------------------------------------------------
554 * 0 to 127 = 0xxxxxxx (US-ASCII)
555 * 128 to 2047 = 110xxxxx 10yyyyyy
556 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
557 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
558 *
559 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
560 * which would convert to five- or six-octet UTF-8 sequences...
561 *
562 * This code does not support Unicode beyond 16-bits (Plane 0)...
563 */
564int /* O - Count or -1 on error */
565cupsUTF32ToUTF8(cups_utf8_t *dest, /* O - Target string */
566 const cups_utf32_t *src, /* I - Source string */
567 const int maxout) /* I - Max output */
568{
569 cups_utf32_t *first = (cups_utf32_t *) src;
570 /* First source char */
571 cups_utf8_t *start = dest; /* Start of destination string */
572 int i; /* Looping variable */
573 int swap = 0; /* Byte-swap input to output */
574 cups_utf32_t ch; /* Character value */
575
576 /*
577 * Check for valid arguments and clear output...
578 */
579 if ((dest == NULL)
580 || (src == NULL)
581 || (maxout < 1))
582 return (-1);
583 *dest = '\0';
584
585 /*
586 * Check for leading BOM in UTF-32 and inverted BOM...
587 */
588 if (*src == 0xfffe0000)
589 swap = 1;
590
591 /*
592 * Convert input UTF-32 to output UTF-8...
593 */
594 for (i = 0; i < (maxout - 1); src ++)
595 {
596 ch = *src;
597 if (ch == 0)
598 break;
599
600 /*
601 * Byte swap input UTF-32, if necessary...
602 */
603 if (swap)
604 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
605
606 /*
607 * Check for leading BOM (and delete from output)...
608 */
609 if ((src == first) && (ch == 0xfeff))
610 continue;
611
612 /*
613 * Check for beyond Plane 16 (invalid UTF-32)...
614 */
615 if (ch > 0x10ffff)
616 return (-1);
617
618 /*
619 * Convert beyond Plane 0 (BMP) to Replacement Character...
620 */
621 if (ch > 0xffff)
622 ch = 0xfffd;
623
624 /*
625 * Convert UTF-32 character to UTF-8 character(s)...
626 */
627 if (ch <= 0x7f)
628 {
629 /*
630 * One-octet UTF-8 <= 127 (US-ASCII)...
631 */
632 *dest = (cups_utf8_t) ch;
633 dest ++;
634 i ++;
635 }
636 else if (ch <= 0x7ff)
637 {
638 /*
639 * Two-octet UTF-8 <= 2047 (Latin-x)...
640 */
641 if (i > (maxout - 2))
642 break;
643 *dest = (cups_utf8_t) (0xc0 | ((ch >> 6) & 0x1f));
644 dest ++;
645 i ++;
646 *dest = (cups_utf8_t) (0x80 | (ch & 0x3f));
647 dest ++;
648 i ++;
649 }
650 else
651 {
652 /*
653 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
654 */
655 if (i > (maxout - 3))
656 break;
657 *dest = (cups_utf8_t) (0xe0 | ((ch >> 12) & 0x0f));
658 dest ++;
659 i ++;
660 *dest = (cups_utf8_t) (0x80 | ((ch >> 6) & 0x3f));
661 dest ++;
662 i ++;
663 *dest = (cups_utf8_t) (0x80 | (ch & 0x3f));
664 dest ++;
665 i ++;
666 }
667 }
668 *dest = '\0';
669 i = (int) (dest - start);
670 return (i);
671}
672
673/*
674 * 'cupsUTF16ToUTF32()' - Convert UTF-16 to UTF-32.
675 *
676 * This code does not support Unicode beyond 16-bits (Plane 0)...
677 */
678int /* O - Count or -1 on error */
679cupsUTF16ToUTF32(cups_utf32_t *dest, /* O - Target string */
680 const cups_utf16_t *src, /* I - Source string */
681 const int maxout) /* I - Max output */
682{
683 int i; /* Looping variable */
684 int swap = 0; /* Byte-swap input to output */
685 int surrogate = 0; /* Expecting low-half surrogate */
686 cups_utf32_t ch; /* Character value */
687
688 /*
689 * Check for valid arguments and clear output...
690 */
691 if ((dest == NULL)
692 || (src == NULL)
693 || (maxout < 1)
694 || (maxout > CUPS_MAX_USTRING))
695 return (-1);
696 *dest = 0;
697
698 /*
699 * Check for leading BOM in UTF-16 and inverted BOM...
700 */
701 if (*src == 0xfffe)
702 swap = 1;
703
704 /*
705 * Convert input UTF-16 to output UTF-32...
706 */
707 for (i = 0; i < (maxout - 1); src ++)
708 {
709 ch = (cups_utf32_t) (*src & 0xffff);
710 if (ch == 0)
711 break;
712 i ++;
713
714 /*
715 * Byte swap input UTF-16, if necessary...
716 */
717 if (swap)
718 ch = (cups_utf32_t) ((ch << 8) | (ch >> 8));
719
720 /*
721 * Discard expected UTF-16 low-half surrogate...
722 */
723 if ((ch >= 0xdc00) && (ch <= 0xdfff))
724 {
725 if (surrogate == 0)
726 return (-1);
727 surrogate = 0;
728 continue;
729 }
730
731 /*
732 * Convert UTF-16 high-half surrogate to Replacement Character...
733 */
734 if ((ch >= 0xd800) && (ch <= 0xdbff))
735 {
736 if (surrogate == 1)
737 return (-1);
738 surrogate = 1;
739 ch = 0xfffd;
740 }
741 *dest = ch;
742 dest ++;
743 }
744 *dest = 0;
745 return (i);
746}
747
748/*
749 * 'cupsUTF32ToUTF16()' - Convert UTF-32 to UTF-16.
750 *
751 * This code does not support Unicode beyond 16-bits (Plane 0)...
752 */
753int /* O - Count or -1 on error */
754cupsUTF32ToUTF16(cups_utf16_t *dest, /* O - Target string */
755 const cups_utf32_t *src, /* I - Source string */
756 const int maxout) /* I - Max output */
757{
758 int i; /* Looping variable */
759 int swap = 0; /* Byte-swap input to output */
760 cups_utf32_t ch; /* Character value */
761
762 /*
763 * Check for valid arguments and clear output...
764 */
765 if ((dest == NULL)
766 || (src == NULL)
767 || (maxout < 1)
768 || (maxout > CUPS_MAX_USTRING))
769 return (-1);
770 *dest = 0;
771
772 /*
773 * Check for leading BOM in UTF-32 and inverted BOM...
774 */
775 if (*src == 0xfffe0000)
776 swap = 1;
777
778 /*
779 * Convert input UTF-32 to output UTF-16 (w/out surrogate pairs)...
780 */
781 for (i = 0; i < (maxout - 1); src ++, dest ++)
782 {
783 ch = *src;
784 if (ch == 0)
785 break;
786 i ++;
787
788 /*
789 * Byte swap input UTF-32, if necessary...
790 */
791 if (swap)
792 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
793
794 /*
795 * Check for UTF-16 surrogate (illegal UTF-32)...
796 */
797 if ((ch >= 0xd800) && (ch <= 0xdfff))
798 return (-1);
799
800 /*
801 * Check for beyond Plane 16 (invalid UTF-32)...
802 */
803 if (ch > 0x10ffff)
804 return (-1);
805
806 /*
807 * Convert beyond Plane 0 (BMP) to Replacement Character...
808 */
809 if (ch > 0xffff)
810 ch = 0xfffd;
811 *dest = (cups_utf16_t) ch;
812 }
813 *dest = 0;
814 return (i);
815}
816
817/*
818 * 'get_charmap_count()' - Count lines in a charmap file.
819 */
820static int /* O - Count or -1 on error */
821get_charmap_count(const char *filename) /* I - Charmap Filename */
822{
823 int i; /* Looping variable */
824 cups_file_t *fp; /* Map input file pointer */
825 char *s; /* Line parsing pointer */
826 char line[256]; /* Line from input map file */
827 cups_utf32_t unichar; /* Unicode character value */
828
829 /*
830 * Open map input file...
831 */
832 if ((filename == NULL) || (*filename == '\0'))
833 return (-1);
834 fp = cupsFileOpen(filename, "r");
835 if (fp == NULL)
836 return (-1);
837
838 /*
839 * Count lines in map input file...
840 */
841 for (i = 0; i < CUPS_MAX_CHARMAP_LINES;)
842 {
843 s = cupsFileGets(fp, line, sizeof(line));
844 if (s == NULL)
845 break;
846 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
847 continue;
848 while ((*s != 0) && (*s != ' ') && (*s != '\t'))
849 s ++;
850 while ((*s == ' ') || (*s == '\t'))
851 s ++;
852 if (strncmp (s, "0x", 2) == 0)
853 s += 2;
854 if ((sscanf(s, "%lx", &unichar) != 1)
855 || (unichar > 0xffff))
856 {
857 cupsFileClose(fp);
858 return (-1);
859 }
860 i ++;
861 }
862 if (i == 0)
863 i = -1;
864
865 /*
866 * Close file and return charmap count (non-comment line count)...
867 */
868 cupsFileClose(fp);
869 return (i);
870}
871
872/*
873 * 'get_sbcs_charmap()' - Get SBCS Charmap.
874 */
875static _cups_cmap_t * /* O - Charmap or 0 on error */
876get_sbcs_charmap(const cups_encoding_t encoding,
877 /* I - Charmap Encoding */
878 const char *filename) /* I - Charmap Filename */
879{
880 int i; /* Loop variable */
881 unsigned long legchar; /* Legacy character value */
882 cups_utf32_t unichar; /* Unicode character value */
883 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
884 cups_file_t *fp; /* Charset map file pointer */
885 char *s; /* Line parsing pointer */
886 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
887 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
888 char line[256]; /* Line from charset map file */
889 _cups_globals_t *cg = _cupsGlobals();
890 /* Pointer to library globals */
891
892 /*
893 * Check for valid arguments...
894 */
895 if ((encoding < 0) || (filename == NULL))
896 return (NULL);
897
898 /*
899 * See if we already have this SBCS charset map loaded...
900 */
901 for (cmap = cg->cmap_cache; cmap != NULL; cmap = cmap->next)
902 {
903 if (cmap->encoding == encoding)
904 {
905 cmap->used ++;
906 return ((void *) cmap);
907 }
908 }
909
910 /*
911 * Open SBCS charset map input file...
912 */
913 fp = cupsFileOpen(filename, "r");
914 if (fp == NULL)
915 return (NULL);
916
917 /*
918 * Allocate memory for SBCS charset map and add to cache...
919 */
920 cmap = (_cups_cmap_t *) calloc(1, sizeof(_cups_cmap_t));
921 if (cmap == NULL)
922 {
923 cupsFileClose(fp);
924 return (NULL);
925 }
926 cmap->next = cg->cmap_cache;
927 cg->cmap_cache = cmap;
928 cmap->used ++;
929 cmap->encoding = encoding;
930
931 /*
932 * Save SBCS charset map into memory for transcoding...
933 */
934 for (i = 0; i < CUPS_MAX_CHARMAP_LINES;)
935 {
936 s = cupsFileGets(fp, line, sizeof(line));
937 if (s == NULL)
938 break;
939 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
940 continue;
941 if (strncmp (s, "0x", 2) == 0)
942 s += 2;
943 if ((sscanf(s, "%lx", &legchar) != 1)
944 || (legchar > 0xff))
945 {
946 cupsFileClose(fp);
947 cupsCharmapFlush();
948 return (NULL);
949 }
950 while ((*s != 0) && (*s != ' ') && (*s != '\t'))
951 s ++;
952 while ((*s == ' ') || (*s == '\t'))
953 s ++;
954 if (strncmp (s, "0x", 2) == 0)
955 s += 2;
956 if (sscanf(s, "%lx", &unichar) != 1)
957 {
958 cupsFileClose(fp);
959 cupsCharmapFlush();
960 return (NULL);
961 }
962 i ++;
963
964 /*
965 * Convert beyond Plane 0 (BMP) to Replacement Character...
966 */
967 if (unichar > 0xffff)
968 unichar = 0xfffd;
969
970 /*
971 * Save legacy to Unicode mapping in direct lookup table...
972 */
973 crow = &cmap->char2uni[(int) legchar];
974 *crow = (cups_ucs2_t) (unichar & 0xffff);
975
976 /*
977 * Save Unicode to legacy mapping in indirect lookup table...
978 */
979 srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)];
980 if (srow == NULL)
981 {
982 srow = (cups_sbcs_t *) calloc(256, sizeof(cups_sbcs_t));
983 if (srow == NULL)
984 {
985 cupsFileClose(fp);
986 cupsCharmapFlush();
987 return (NULL);
988 }
989 cmap->uni2char[(int) ((unichar >> 8) & 0xff)] = srow;
990 }
991 srow += (int) (unichar & 0xff);
992
993 /*
994 * Convert Replacement Character to visible replacement...
995 */
996 if (unichar == 0xfffd)
997 legchar = (unsigned long) '?';
998
999 /*
1000 * First (oldest) legacy character uses Unicode mapping cell...
1001 */
1002 if (*srow == 0)
1003 *srow = (cups_sbcs_t) legchar;
1004 }
1005 cupsFileClose(fp);
1006 return (cmap);
1007}
1008
1009/*
1010 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1011 */
1012static _cups_vmap_t * /* O - Charmap or 0 on error */
1013get_vbcs_charmap(const cups_encoding_t encoding,
1014 /* I - Charmap Encoding */
1015 const char *filename) /* I - Charmap Filename */
1016{
1017 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1018 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1019 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1020 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1021 cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
1022 unsigned long legchar; /* Legacy character value */
1023 cups_utf32_t unichar; /* Unicode character value */
1024 int mapcount; /* Count of lines in charmap file */
1025 cups_file_t *fp; /* Charset map file pointer */
1026 char *s; /* Line parsing pointer */
1027 char line[256]; /* Line from charset map file */
1028 int i; /* Loop variable */
1029 int wide; /* 32-bit legacy char */
1030 _cups_globals_t *cg = _cupsGlobals();
1031 /* Pointer to library globals */
1032
1033 /*
1034 * Check for valid arguments...
1035 */
1036 if ((encoding < 0) || (filename == NULL))
1037 return (NULL);
1038
1039 /*
1040 * See if we already have this DBCS/VBCS charset map loaded...
1041 */
1042 for (vmap = cg->vmap_cache; vmap != NULL; vmap = vmap->next)
1043 {
1044 if (vmap->encoding == encoding)
1045 {
1046 vmap->used ++;
1047 return ((void *) vmap);
1048 }
1049 }
1050
1051 /*
1052 * Count lines in charmap file...
1053 */
1054 mapcount = get_charmap_count(filename);
1055 if (mapcount <= 0)
1056 return (NULL);
1057
1058 /*
1059 * Open VBCS charset map input file...
1060 */
1061 fp = cupsFileOpen(filename, "r");
1062 if (fp == NULL)
1063 return (NULL);
1064
1065 /*
1066 * Allocate memory for DBCS/VBCS charset map and add to cache...
1067 */
1068 vmap = (_cups_vmap_t *) calloc(1, sizeof(_cups_vmap_t));
1069 if (vmap == NULL)
1070 {
1071 cupsFileClose(fp);
1072 return (NULL);
1073 }
1074 vmap->next = cg->vmap_cache;
1075 cg->vmap_cache = vmap;
1076 vmap->used ++;
1077 vmap->encoding = encoding;
1078
1079 /*
1080 * Save DBCS/VBCS charset map into memory for transcoding...
1081 */
1082 leadchar = 0;
1083 wide2uni = NULL;
1084
1085 for (i = 0, wide = 0; i < mapcount; )
1086 {
1087 s = cupsFileGets(fp, line, sizeof(line));
1088 if (s == NULL)
1089 break;
1090 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1091 continue;
1092 if (strncmp (s, "0x", 2) == 0)
1093 s += 2;
1094 if ((sscanf(s, "%lx", &legchar) != 1)
1095 || ((legchar > 0xffff) && (encoding < CUPS_ENCODING_DBCS_END)))
1096 {
1097 cupsFileClose(fp);
1098 cupsCharmapFlush();
1099 return (NULL);
1100 }
1101 while ((*s != 0) && (*s != ' ') && (*s != '\t'))
1102 s ++;
1103 while ((*s == ' ') || (*s == '\t'))
1104 s ++;
1105 if (strncmp (s, "0x", 2) == 0)
1106 s += 2;
1107 if (sscanf(s, "%lx", &unichar) != 1)
1108 {
1109 cupsFileClose(fp);
1110 cupsCharmapFlush();
1111 return (NULL);
1112 }
1113 i ++;
1114
1115 /*
1116 * Convert beyond Plane 0 (BMP) to Replacement Character...
1117 */
1118 if (unichar > 0xffff)
1119 unichar = 0xfffd;
1120
1121 /*
1122 * Save lead char of 2/3/4-byte legacy char...
1123 */
1124 if ((legchar > 0xff) && (legchar <= 0xffff))
1125 {
1126 leadchar = (cups_sbcs_t) (legchar >> 8);
1127 vmap->lead2char[leadchar] = leadchar;
1128 }
1129 if ((legchar > 0xffff) && (legchar <= 0xffffff))
1130 {
1131 leadchar = (cups_sbcs_t) (legchar >> 16);
1132 vmap->lead3char[leadchar] = leadchar;
1133 }
1134 if (legchar > 0xffffff)
1135 {
1136 leadchar = (cups_sbcs_t) (legchar >> 24);
1137 vmap->lead4char[leadchar] = leadchar;
1138 }
1139
1140 /*
1141 * Save Legacy to Unicode mapping...
1142 */
1143 if (legchar <= 0xffff)
1144 {
1145 /*
1146 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1147 */
1148 crow = vmap->char2uni[(int) leadchar];
1149 if (crow == NULL)
1150 {
1151 crow = (cups_ucs2_t *) calloc(256, sizeof(cups_ucs2_t));
1152 if (crow == NULL)
1153 {
1154 cupsFileClose(fp);
1155 cupsCharmapFlush();
1156 return (NULL);
1157 }
1158 vmap->char2uni[(int) leadchar] = crow;
1159 }
1160 crow += (int) (legchar & 0xff);
1161 *crow = (cups_ucs2_t) unichar;
1162 }
1163 else
1164 {
1165 /*
1166 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1167 */
1168 if (wide == 0)
1169 {
1170 wide = 1;
1171 vmap->widecount = (mapcount - i + 1);
1172 wide2uni = (_cups_wide2uni_t *)
1173 calloc(vmap->widecount, sizeof(_cups_wide2uni_t));
1174 if (wide2uni == NULL)
1175 {
1176 cupsFileClose(fp);
1177 cupsCharmapFlush();
1178 return (NULL);
1179 }
1180 vmap->wide2uni = wide2uni;
1181 }
1182 wide2uni->widechar = (cups_vbcs_t) legchar;
1183 wide2uni->unichar = (cups_ucs2_t)unichar;
1184 wide2uni ++;
1185 }
1186
1187 /*
1188 * Save Unicode to legacy mapping in indirect lookup table...
1189 */
1190 vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1191 if (vrow == NULL)
1192 {
1193 vrow = (cups_vbcs_t *) calloc(256, sizeof(cups_vbcs_t));
1194 if (vrow == NULL)
1195 {
1196 cupsFileClose(fp);
1197 cupsCharmapFlush();
1198 return (NULL);
1199 }
1200 vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1201 }
1202 vrow += (int) (unichar & 0xff);
1203
1204 /*
1205 * Convert Replacement Character to visible replacement...
1206 */
1207 if (unichar == 0xfffd)
1208 legchar = (unsigned long) '?';
1209
1210 /*
1211 * First (oldest) legacy character uses Unicode mapping cell...
1212 */
1213 if (*vrow == 0)
1214 *vrow = (cups_vbcs_t) legchar;
1215 }
1216 vmap->charcount = (i - vmap->widecount);
1217 cupsFileClose(fp);
1218 return (vmap);
1219}
1220
1221/*
1222 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
1223 */
1224static int /* O - Count or -1 on error */
1225conv_utf8_to_sbcs(char *dest, /* O - Target string */
1226 const cups_utf8_t *src, /* I - Source string */
1227 const int maxout, /* I - Max output */
1228 const cups_encoding_t encoding) /* I - Encoding */
1229{
1230 char *start = dest; /* Start of destination string */
1231 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1232 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
1233 cups_utf32_t unichar; /* Character value */
1234 int worklen; /* Internal UCS-4 string length */
1235 cups_utf32_t work[CUPS_MAX_USTRING];
1236 /* Internal UCS-4 string */
1237 int i; /* Looping variable */
1238
1239 /*
1240 * Check for valid arguments and clear output...
1241 */
1242 if ((dest == NULL)
1243 || (src == NULL)
1244 || (maxout < 1)
1245 || (maxout > CUPS_MAX_USTRING)
1246 || (encoding == CUPS_UTF8))
1247 return (-1);
1248 *dest = '\0';
1249
1250 /*
1251 * Find legacy charset map in cache...
1252 */
1253 cmap = (_cups_cmap_t *) cupsCharmapGet(encoding);
1254 if (cmap == NULL)
1255 return (-1);
1256
1257 /*
1258 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1259 */
1260 worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
1261 if (worklen < 0)
1262 return (-1);
1263
1264 /*
1265 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
1266 */
1267 for (i = 0; i < worklen;)
1268 {
1269 unichar = work[i];
1270 if (unichar == 0)
1271 break;
1272 i ++;
1273
1274 /*
1275 * Check for leading BOM (and delete from output)...
1276 */
1277 if ((i == 1) && (unichar == 0xfeff))
1278 continue;
1279
1280 /*
1281 * Convert ASCII verbatim (optimization)...
1282 */
1283 if (unichar <= 0x7f)
1284 {
1285 *dest = (char) unichar;
1286 dest ++;
1287 continue;
1288 }
1289
1290 /*
1291 * Convert unknown character to visible replacement...
1292 */
1293 srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1294 if (srow)
1295 srow += (int) (unichar & 0xff);
1296 if ((srow == NULL) || (*srow == 0))
1297 *dest = '?';
1298 else
1299 *dest = (char) (*srow);
1300 dest ++;
1301 }
1302 *dest = '\0';
1303 worklen = (int) (dest - start);
1304 cupsCharmapFree(encoding);
1305 return (worklen);
1306}
1307
1308/*
1309 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
1310 */
1311static int /* O - Count or -1 on error */
1312conv_utf8_to_vbcs(char *dest, /* O - Target string */
1313 const cups_utf8_t *src, /* I - Source string */
1314 const int maxout, /* I - Max output */
1315 const cups_encoding_t encoding) /* I - Encoding */
1316{
1317 char *start = dest; /* Start of destination string */
1318 _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
1319 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1320 cups_utf32_t unichar; /* Character value */
1321 cups_vbcs_t legchar; /* Legacy character value */
1322 int worklen; /* Internal UCS-4 string length */
1323 cups_utf32_t work[CUPS_MAX_USTRING];
1324 /* Internal UCS-4 string */
1325 int i; /* Looping variable */
1326
1327 /*
1328 * Check for valid arguments and clear output...
1329 */
1330 if ((dest == NULL)
1331 || (src == NULL)
1332 || (maxout < 1)
1333 || (maxout > CUPS_MAX_USTRING)
1334 || (encoding == CUPS_UTF8))
1335 return (-1);
1336 *dest = '\0';
1337
1338 /*
1339 * Find legacy charset map in cache...
1340 */
1341 vmap = (_cups_vmap_t *) cupsCharmapGet(encoding);
1342 if (vmap == NULL)
1343 return (-1);
1344
1345 /*
1346 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1347 */
1348 worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
1349 if (worklen < 0)
1350 return (-1);
1351
1352 /*
1353 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
1354 */
1355 for (i = 0; i < worklen;)
1356 {
1357 unichar = work[i];
1358 if (unichar == 0)
1359 break;
1360 i ++;
1361
1362 /*
1363 * Check for leading BOM (and delete from output)...
1364 */
1365 if ((i == 1) && (unichar == 0xfeff))
1366 continue;
1367
1368 /*
1369 * Convert ASCII verbatim (optimization)...
1370 */
1371 if (unichar <= 0x7f)
1372 {
1373 *dest = (char) unichar;
1374 dest ++;
1375 continue;
1376 }
1377
1378 /*
1379 * Convert unknown character to visible replacement...
1380 */
1381 vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1382 if (vrow)
1383 vrow += (int) (unichar & 0xff);
1384 if ((vrow == NULL) || (*vrow == 0))
1385 legchar = (cups_vbcs_t) '?';
1386 else
1387 legchar = (cups_vbcs_t) *vrow;
1388
1389 /*
1390 * Save n-byte legacy character...
1391 */
1392 if (legchar > 0xffffff)
1393 {
1394 *dest = (char) ((legchar >> 24) & 0xff);
1395 dest++;
1396 }
1397 if (legchar > 0xffff)
1398 {
1399 *dest = (char) ((legchar >> 16) & 0xff);
1400 dest++;
1401 }
1402 if (legchar > 0xff)
1403 {
1404 *dest = (char) ((legchar >> 8) & 0xff);
1405 dest++;
1406 }
1407 *dest = (char) (legchar & 0xff);
1408 dest ++;
1409 }
1410 *dest = '\0';
1411 worklen = (int) (dest - start);
1412 cupsCharmapFree(encoding);
1413 return (worklen);
1414}
1415
1416/*
1417 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
1418 */
1419static int /* O - Count or -1 on error */
1420conv_sbcs_to_utf8(cups_utf8_t *dest, /* O - Target string */
1421 const char *src, /* I - Source string */
1422 const int maxout, /* I - Max output */
1423 const cups_encoding_t encoding) /* I - Encoding */
1424{
1425 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1426 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1427 unsigned long legchar; /* Legacy character value */
1428 cups_utf32_t unichar; /* Unicode character value */
1429 int worklen; /* Internal UCS-4 string length */
1430 cups_utf32_t work[CUPS_MAX_USTRING];
1431 /* Internal UCS-4 string */
1432 int i; /* Looping variable */
1433
1434 /*
1435 * Check for valid arguments and clear output...
1436 */
1437 if ((dest == NULL)
1438 || (src == NULL)
1439 || (maxout < 1)
1440 || (maxout > CUPS_MAX_USTRING)
1441 || (encoding == CUPS_UTF8))
1442 return (-1);
1443 *dest = '\0';
1444
1445 /*
1446 * Find legacy charset map in cache...
1447 */
1448 cmap = (_cups_cmap_t *) cupsCharmapGet(encoding);
1449 if (cmap == NULL)
1450 return (-1);
1451
1452 /*
1453 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1454 */
1455 work[0] = 0xfeff;
1456 for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++)
1457 {
1458 if (*src == '\0')
1459 break;
1460 legchar = (unsigned long) *src;
1461
1462 /*
1463 * Convert ASCII verbatim (optimization)...
1464 */
1465 if (legchar <= 0x7f)
1466 {
1467 work[i] = (cups_utf32_t) legchar;
1468 i ++;
1469 continue;
1470 }
1471
1472 /*
1473 * Convert unknown character to Replacement Character...
1474 */
1475 crow = &cmap->char2uni[0];
1476 crow += (int) legchar;
1477 if (*crow == 0)
1478 unichar = 0xfffd;
1479 else
1480 unichar = (cups_utf32_t) *crow;
1481 work[i] = unichar;
1482 i ++;
1483 }
1484 work[i] = 0;
1485
1486 /*
1487 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1488 */
1489 worklen = cupsUTF32ToUTF8(dest, work, maxout);
1490 cupsCharmapFree(encoding);
1491 return (worklen);
1492}
1493
1494
1495/*
1496 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1497 */
1498static int /* O - Count or -1 on error */
1499conv_vbcs_to_utf8(cups_utf8_t *dest, /* O - Target string */
1500 const char *src, /* I - Source string */
1501 const int maxout, /* I - Max output */
1502 const cups_encoding_t encoding) /* I - Encoding */
1503{
1504 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1505 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1506 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1507 cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
1508 cups_vbcs_t legchar; /* Legacy character value */
1509 cups_utf32_t unichar; /* Unicode character value */
1510 int i; /* Looping variable */
1511 int worklen; /* Internal UCS-4 string length */
1512 cups_utf32_t work[CUPS_MAX_USTRING];
1513 /* Internal UCS-4 string */
1514
1515 /*
1516 * Check for valid arguments and clear output...
1517 */
1518 if ((dest == NULL)
1519 || (src == NULL)
1520 || (maxout < 1)
1521 || (maxout > CUPS_MAX_USTRING)
1522 || (encoding == CUPS_UTF8))
1523 return (-1);
1524 *dest = '\0';
1525
1526 /*
1527 * Find legacy charset map in cache...
1528 */
1529 vmap = (_cups_vmap_t *) cupsCharmapGet(encoding);
1530 if (vmap == NULL)
1531 return (-1);
1532
1533 /*
1534 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1535 */
1536 work[0] = 0xfeff;
1537 for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++)
1538 {
1539 if (*src == '\0')
1540 break;
1541 legchar = (cups_vbcs_t) *src;
1542 leadchar = (cups_sbcs_t) *src;
1543
1544 /*
1545 * Convert ASCII verbatim (optimization)...
1546 */
1547 if (legchar <= 0x7f)
1548 {
1549 work[i] = (cups_utf32_t) legchar;
1550 i ++;
1551 continue;
1552 }
1553
1554 /*
1555 * Convert 2-byte legacy character...
1556 */
1557 if (vmap->lead2char[(int) leadchar] == leadchar)
1558 {
1559 src ++;
1560 if (*src == '\0')
1561 return (-1);
1562 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1563
1564 /*
1565 * Convert unknown character to Replacement Character...
1566 */
1567 crow = vmap->char2uni[(int) ((legchar >> 8) & 0xff)];
1568 if (crow)
1569 crow += (int) (legchar & 0xff);
1570 if ((crow == NULL) || (*crow == 0))
1571 unichar = 0xfffd;
1572 else
1573 unichar = (cups_utf32_t) *crow;
1574 work[i] = unichar;
1575 i ++;
1576 continue;
1577 }
1578
1579 /*
1580 * Fetch 3-byte or 4-byte legacy character...
1581 */
1582 if (vmap->lead3char[(int) leadchar] == leadchar)
1583 {
1584 src ++;
1585 if (*src == '\0')
1586 return (-1);
1587 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1588 src ++;
1589 if (*src == '\0')
1590 return (-1);
1591 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1592 }
1593 else if (vmap->lead4char[(int) leadchar] == leadchar)
1594 {
1595 src ++;
1596 if (*src == '\0')
1597 return (-1);
1598 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1599 src ++;
1600 if (*src == '\0')
1601 return (-1);
1602 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1603 src ++;
1604 if (*src == '\0')
1605 return (-1);
1606 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1607 }
1608 else
1609 return (-1);
1610
1611 /*
1612 * Find 3-byte or 4-byte legacy character...
1613 */
1614 wide2uni = vmap->wide2uni;
1615 wide2uni = (_cups_wide2uni_t *) bsearch(&legchar,
1616 vmap->wide2uni,
1617 vmap->widecount,
1618 sizeof(_cups_wide2uni_t),
1619 compare_wide);
1620
1621 /*
1622 * Convert unknown character to Replacement Character...
1623 */
1624 if ((wide2uni == NULL) || (wide2uni->unichar == 0))
1625 unichar = 0xfffd;
1626 else
1627 unichar = wide2uni->unichar;
1628 work[i] = unichar;
1629 i ++;
1630 }
1631 work[i] = 0;
1632
1633 /*
1634 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1635 */
1636 worklen = cupsUTF32ToUTF8(dest, work, maxout);
1637 cupsCharmapFree(encoding);
1638 return (worklen);
1639}
1640
1641/*
1642 * 'compare_wide()' - Compare key for wide (VBCS) match.
1643 */
1644static int
1645compare_wide(const void *k1, /* I - Key char */
1646 const void *k2) /* I - Map char */
1647{
1648 cups_vbcs_t *kp = (cups_vbcs_t *) k1;
1649 /* Key char pointer */
1650 _cups_wide2uni_t *mp = (_cups_wide2uni_t *) k2;
1651 /* Map char pointer */
1652 cups_vbcs_t key; /* Legacy key character */
1653 cups_vbcs_t map; /* Legacy map character */
1654 int result; /* Result Value */
1655
1656 key = *kp;
1657 map = mp->widechar;
1658 if (key >= map)
1659 result = (int) (key - map);
1660 else
1661 result = -1 * ((int) (map - key));
1662 return (result);
1663}
1664
1665
1666/*
1667 * End of "$Id: transcode.c 4903 2006-01-10 20:02:46Z mike $"
1668 */