]> git.ipfire.org Git - thirdparty/cups.git/blob - cups/transcode.c
05d404bbc0bbd00b5298e8ee316a1ac41ff67abb
[thirdparty/cups.git] / cups / transcode.c
1 /*
2 * "$Id$"
3 *
4 * Transcoding support for the Common UNIX Printing System (CUPS).
5 *
6 * Copyright 1997-2005 by Easy Software Products.
7 *
8 * These coded instructions, statements, and computer programs are
9 * the property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the
11 * file "LICENSE.txt" which should have been included with this file.
12 * If this file is missing or damaged please contact Easy Software
13 * Products at:
14 *
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
19 *
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
23 *
24 * Contents:
25 *
26 * cupsCharmapGet() - Get a character set map.
27 * cupsCharmapFree() - Free a character set map.
28 * cupsCharmapFlush() - Flush all character set maps out of cache.
29 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
30 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
31 * cupsUTF8ToUTF16() - Convert UTF-8 to UTF-16.
32 * cupsUTF16ToUTF8() - Convert UTF-16 to UTF-8.
33 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
34 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
35 * cupsUTF16ToUTF32() - Convert UTF-16 to UTF-32.
36 * cupsUTF32ToUTF16() - Convert UTF-32 to UTF-16.
37 * get_charmap_count() - Count lines in a charmap file.
38 * get_sbcs_charmap() - Get SBCS Charmap.
39 * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
40 * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
41 * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
42 * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
43 * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
44 * compare_wide() - Compare key for wide (VBCS) match.
45 */
46
47 /*
48 * Include necessary headers...
49 */
50
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include <errno.h>
55 #include <ctype.h>
56 #include <time.h>
57
58 #include "language.h"
59 #include "string.h"
60 #include "transcode.h"
61
62
63 /*
64 * Local Globals...
65 */
66
67 static cups_cmap_t *cmap_cache = NULL; /* SBCS Charmap Cache */
68 static cups_vmap_t *vmap_cache = NULL; /* VBCS Charmap Cache */
69
70 /*
71 * Prototypes...
72 */
73
74 static int get_charmap_count(const char *filename);
75 static cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding,
76 const char *filename);
77 static cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding,
78 const char *filename);
79
80 static int conv_utf8_to_sbcs(char *dest,
81 const cups_utf8_t *src,
82 const int maxout,
83 const cups_encoding_t encoding);
84 static int conv_utf8_to_vbcs(char *dest,
85 const cups_utf8_t *src,
86 const int maxout,
87 const cups_encoding_t encoding);
88
89 static int conv_sbcs_to_utf8(cups_utf8_t *dest,
90 const char *src,
91 const int maxout,
92 const cups_encoding_t encoding);
93 static int conv_vbcs_to_utf8(cups_utf8_t *dest,
94 const char *src,
95 const int maxout,
96 const cups_encoding_t encoding);
97
98 static int compare_wide(const void *k1, const void *k2);
99
100 /*
101 * 'cupsCharmapGet()' - Get a character set map.
102 *
103 * This code handles single-byte (SBCS), double-byte (DBCS), and
104 * variable-byte (VBCS) character sets _without_ charset escapes...
105 * This code does not handle multiple-byte character sets (MBCS)
106 * (such as ISO-2022-JP) with charset switching via escapes...
107 */
108 void * /* O - Charset map pointer */
109 cupsCharmapGet(const cups_encoding_t encoding)
110 /* I - Encoding */
111 {
112 char *datadir; /* CUPS_DATADIR environment variable */
113 char mapname[80]; /* Name of charset map */
114 char filename[256]; /* Filename for charset map file */
115
116 /*
117 * Check for valid arguments...
118 */
119 if ((encoding < 0) || (encoding >= CUPS_ENCODING_VBCS_END))
120 return (NULL);
121
122 /*
123 * Get the data directory and charset map name...
124 */
125 if ((datadir = getenv("CUPS_DATADIR")) == NULL)
126 datadir = CUPS_DATADIR;
127 snprintf(mapname, sizeof(mapname), "%s.txt", cupsEncodingName(encoding));
128 snprintf(filename, sizeof(filename), "%s/charmaps/%s",
129 datadir, mapname);
130
131 /*
132 * Read charset map input file into cache...
133 */
134 if (encoding < CUPS_ENCODING_SBCS_END)
135 return (get_sbcs_charmap(encoding, filename));
136 else if (encoding < CUPS_ENCODING_VBCS_END)
137 return (get_vbcs_charmap(encoding, filename));
138 else
139 return (NULL);
140 }
141
142 /*
143 * 'cupsCharmapFree()' - Free a character set map.
144 *
145 * This does not actually free; use 'cupsCharmapFlush()' for that.
146 */
147 void
148 cupsCharmapFree(const cups_encoding_t encoding)
149 /* I - Encoding */
150 {
151 cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
152 cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
153
154 /*
155 * See if we already have this SBCS charset map loaded...
156 */
157 for (cmap = cmap_cache; cmap != NULL; cmap = cmap->next)
158 {
159 if (cmap->encoding == encoding)
160 {
161 if (cmap->used > 0)
162 cmap->used --;
163 return;
164 }
165 }
166
167 /*
168 * See if we already have this DBCS/VBCS charset map loaded...
169 */
170 for (vmap = vmap_cache; vmap != NULL; vmap = vmap->next)
171 {
172 if (vmap->encoding == encoding)
173 {
174 if (vmap->used > 0)
175 vmap->used --;
176 return;
177 }
178 }
179 return;
180 }
181
182 /*
183 * 'cupsCharmapFlush()' - Flush all character set maps out of cache.
184 */
185 void
186 cupsCharmapFlush(void)
187 {
188 int i; /* Looping variable */
189 cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
190 cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
191 cups_cmap_t *cnext; /* Next Legacy SBCS Charset Map */
192 cups_vmap_t *vnext; /* Next Legacy VBCS Charset Map */
193 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
194 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
195 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
196
197 /*
198 * Loop through SBCS charset map cache, free all memory...
199 */
200 for (cmap = cmap_cache; cmap != NULL; cmap = cnext)
201 {
202 for (i = 0; i < 256; i ++)
203 {
204 if ((srow = cmap->uni2char[i]) != NULL)
205 free(srow);
206 }
207 cnext = cmap->next;
208 free(cmap);
209 }
210 cmap_cache = NULL;
211
212 /*
213 * Loop through DBCS/VBCS charset map cache, free all memory...
214 */
215 for (vmap = vmap_cache; vmap != NULL; vmap = vnext)
216 {
217 for (i = 0; i < 256; i ++)
218 {
219 if ((crow = vmap->char2uni[i]) != NULL)
220 free(crow);
221 }
222 for (i = 0; i < 256; i ++)
223 {
224 if ((vrow = vmap->uni2char[i]) != NULL)
225 free(vrow);
226 }
227 if (vmap->wide2uni)
228 free(vmap->wide2uni);
229 vnext = vmap->next;
230 free(vmap);
231 }
232 vmap_cache = NULL;
233 return;
234 }
235
236 /*
237 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
238 *
239 * This code handles single-byte (SBCS), double-byte (DBCS), and
240 * variable-byte (VBCS) character sets _without_ charset escapes...
241 * This code does not handle multiple-byte character sets (MBCS)
242 * (such as ISO-2022-JP) with charset switching via escapes...
243 */
244 int /* O - Count or -1 on error */
245 cupsUTF8ToCharset(char *dest, /* O - Target string */
246 const cups_utf8_t *src, /* I - Source string */
247 const int maxout, /* I - Max output */
248 const cups_encoding_t encoding) /* I - Encoding */
249 {
250 /*
251 * Check for valid arguments...
252 */
253 if ((dest == NULL)
254 || (src == NULL)
255 || (maxout < 1)
256 || (maxout > CUPS_MAX_USTRING)
257 || (encoding < 0)
258 || (encoding == CUPS_UTF8)
259 || (encoding >= CUPS_ENCODING_VBCS_END))
260 return (-1);
261
262 /*
263 * Convert input UTF-8 to legacy charset...
264 */
265 if (encoding < CUPS_ENCODING_SBCS_END)
266 return (conv_utf8_to_sbcs(dest, src, maxout, encoding));
267 else if (encoding < CUPS_ENCODING_VBCS_END)
268 return (conv_utf8_to_vbcs(dest, src, maxout, encoding));
269 else
270 return (-1);
271 }
272
273 /*
274 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
275 *
276 * This code handles single-byte (SBCS), double-byte (DBCS), and
277 * variable-byte (VBCS) character sets _without_ charset escapes...
278 * This code does not handle multiple-byte character sets (MBCS)
279 * (such as ISO-2022-JP) with charset switching via escapes...
280 */
281 int /* O - Count or -1 on error */
282 cupsCharsetToUTF8(cups_utf8_t *dest, /* O - Target string */
283 const char *src, /* I - Source string */
284 const int maxout, /* I - Max output */
285 const cups_encoding_t encoding) /* I - Encoding */
286 {
287 /*
288 * Check for valid arguments...
289 */
290 if ((dest == NULL)
291 || (src == NULL)
292 || (maxout < 1)
293 || (maxout > CUPS_MAX_USTRING)
294 || (encoding < 0)
295 || (encoding == CUPS_UTF8)
296 || (encoding >= CUPS_ENCODING_VBCS_END))
297 return (-1);
298
299 /*
300 * Convert input legacy charset to UTF-8...
301 */
302 if (encoding < CUPS_ENCODING_SBCS_END)
303 return (conv_sbcs_to_utf8(dest, src, maxout, encoding));
304 else if (encoding < CUPS_ENCODING_VBCS_END)
305 return (conv_vbcs_to_utf8(dest, src, maxout, encoding));
306 else
307 return (-1);
308 }
309
310 /*
311 * 'cupsUTF8ToUTF16()' - Convert UTF-8 to UTF-16.
312 *
313 * This code does not support Unicode beyond 16-bits (Plane 0)...
314 */
315 int /* O - Count or -1 on error */
316 cupsUTF8ToUTF16(cups_utf16_t *dest, /* O - Target string */
317 const cups_utf8_t *src, /* I - Source string */
318 const int maxout) /* I - Max output */
319 {
320 int worklen; /* Internal UCS-4 string length */
321 cups_utf32_t work[CUPS_MAX_USTRING];
322 /* Internal UCS-4 string */
323
324 /*
325 * Check for valid arguments and clear output...
326 */
327 if ((dest == NULL)
328 || (src == NULL)
329 || (maxout < 1)
330 || (maxout > CUPS_MAX_USTRING))
331 return (-1);
332 *dest = 0;
333
334 /*
335 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
336 */
337 worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
338 if (worklen < 0)
339 return (-1);
340
341 /*
342 * Convert internal UCS-4 to output UTF-16...
343 */
344 worklen = cupsUTF32ToUTF16(dest, work, maxout);
345 return (worklen);
346 }
347
348 /*
349 * 'cupsUTF16ToUTF8()' - Convert UTF-16 to UTF-8.
350 *
351 * This code does not support Unicode beyond 16-bits (Plane 0)...
352 */
353 int /* O - Count or -1 on error */
354 cupsUTF16ToUTF8(cups_utf8_t *dest, /* O - Target string */
355 const cups_utf16_t *src, /* I - Source string */
356 const int maxout) /* I - Max output */
357 {
358 int worklen; /* Internal UCS-4 string length */
359 cups_utf32_t work[CUPS_MAX_USTRING];
360 /* Internal UCS-4 string */
361
362 /*
363 * Check for valid arguments and clear output...
364 */
365 if ((dest == NULL)
366 || (src == NULL)
367 || (maxout < 1)
368 || (maxout > CUPS_MAX_USTRING))
369 return (-1);
370 *dest = 0;
371
372 /*
373 * Convert input UTF-16 to internal UCS-4 (and byte-swap)...
374 */
375 worklen = cupsUTF16ToUTF32(work, src, CUPS_MAX_USTRING);
376 if (worklen < 0)
377 return (-1);
378
379 /*
380 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
381 */
382 worklen = cupsUTF32ToUTF8(dest, work, maxout);
383 return (worklen);
384 }
385
386 /*
387 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
388 *
389 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
390 *
391 * UTF-32 char UTF-8 char(s)
392 * --------------------------------------------------
393 * 0 to 127 = 0xxxxxxx (US-ASCII)
394 * 128 to 2047 = 110xxxxx 10yyyyyy
395 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
396 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
397 *
398 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
399 * which would convert to five- or six-octet UTF-8 sequences...
400 *
401 * This code does not support Unicode beyond 16-bits (Plane 0)...
402 */
403 int /* O - Count or -1 on error */
404 cupsUTF8ToUTF32(cups_utf32_t *dest, /* O - Target string */
405 const cups_utf8_t *src, /* I - Source string */
406 const int maxout) /* I - Max output */
407 {
408 cups_utf8_t *first = (cups_utf8_t *) src;
409 int srclen; /* Source string length */
410 int i; /* Looping variable */
411 cups_utf32_t ch; /* Character value */
412 cups_utf32_t next; /* Next character value */
413 cups_utf32_t ch32; /* UTF-32 character value */
414
415 /*
416 * Check for valid arguments and clear output...
417 */
418 if ((dest == NULL)
419 || (src == NULL)
420 || (maxout < 1)
421 || (maxout > CUPS_MAX_USTRING))
422 return (-1);
423 *dest = 0;
424
425 /*
426 * Convert input UTF-8 to output UTF-32 (and insert BOM)...
427 */
428 *dest = 0xfeff;
429 dest ++;
430 srclen = strlen((char *) src);
431 for (i = 1; i < (maxout - 1); src ++, dest ++)
432 {
433 ch = (cups_utf32_t) *src;
434 ch &= 0xff;
435 if (ch == 0)
436 break;
437 i ++;
438
439 /*
440 * Convert UTF-8 character(s) to UTF-32 character...
441 */
442 if ((ch & 0x7f) == ch)
443 {
444 /*
445 * One-octet UTF-8 <= 127 (US-ASCII)...
446 */
447 *dest = ch;
448 }
449 else if ((ch & 0xe0) == 0xc0)
450 {
451 /*
452 * Two-octet UTF-8 <= 2047 (Latin-x)...
453 */
454 src ++;
455 next = (cups_utf32_t) *src;
456 next &= 0xff;
457 if (next == 0)
458 return (-1);
459 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
460
461 /*
462 * Check for non-shortest form (invalid UTF-8)...
463 */
464 if (ch32 <= 127)
465 return (-1);
466 *dest = ch32;
467 }
468 else if ((ch & 0xf0) == 0xe0)
469 {
470 /*
471 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
472 */
473 src ++;
474 next = (cups_utf32_t) *src;
475 next &= 0xff;
476 if (next == 0)
477 return (-1);
478 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
479 src ++;
480 next = (cups_utf32_t) *src;
481 next &= 0xff;
482 if (next == 0)
483 return (-1);
484 ch32 = ((ch32 << 6) | (next & 0x3f));
485
486 /*
487 * Check for non-shortest form (invalid UTF-8)...
488 */
489 if (ch32 <= 2047)
490 return (-1);
491 *dest = ch32;
492 }
493 else if ((ch & 0xf8) == 0xf0)
494 {
495 /*
496 * Four-octet UTF-8 to Replacement Character...
497 */
498 if (((src - first) + 3) >= srclen)
499 return (-1);
500 src += 3;
501 *dest = 0xfffd;
502 }
503 else if ((ch & 0xfc) == 0xf8)
504 {
505 /*
506 * Five-octet UTF-8 (invalid strict UTF-32)...
507 */
508 return (-1);
509 }
510 else if ((ch & 0xfe) == 0xfc)
511 {
512 /*
513 * Six-octet UTF-8 (invalid strict UTF-32)...
514 */
515 return (-1);
516 }
517 else
518 {
519 /*
520 * More than six-octet (invalid UTF-8 sequence)...
521 */
522 return (-1);
523 }
524
525 /*
526 * Check for UTF-16 surrogate (illegal UTF-8)...
527 */
528 if ((*dest >= 0xd800) && (*dest <= 0xdfff))
529 return (-1);
530
531 /*
532 * Check for beyond Plane 16 (invalid UTF-8)...
533 */
534 if (*dest > 0x10ffff)
535 return (-1);
536 }
537 *dest = 0;
538 return (i);
539 }
540
541 /*
542 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
543 *
544 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
545 *
546 * UTF-32 char UTF-8 char(s)
547 * --------------------------------------------------
548 * 0 to 127 = 0xxxxxxx (US-ASCII)
549 * 128 to 2047 = 110xxxxx 10yyyyyy
550 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
551 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
552 *
553 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
554 * which would convert to five- or six-octet UTF-8 sequences...
555 *
556 * This code does not support Unicode beyond 16-bits (Plane 0)...
557 */
558 int /* O - Count or -1 on error */
559 cupsUTF32ToUTF8(cups_utf8_t *dest, /* O - Target string */
560 const cups_utf32_t *src, /* I - Source string */
561 const int maxout) /* I - Max output */
562 {
563 cups_utf32_t *first = (cups_utf32_t *) src;
564 /* First source char */
565 cups_utf8_t *start = dest; /* Start of destination string */
566 int i; /* Looping variable */
567 int swap = 0; /* Byte-swap input to output */
568 cups_utf32_t ch; /* Character value */
569
570 /*
571 * Check for valid arguments and clear output...
572 */
573 if ((dest == NULL)
574 || (src == NULL)
575 || (maxout < 1))
576 return (-1);
577 *dest = '\0';
578
579 /*
580 * Check for leading BOM in UTF-32 and inverted BOM...
581 */
582 if (*src == 0xfffe0000)
583 swap = 1;
584
585 /*
586 * Convert input UTF-32 to output UTF-8...
587 */
588 for (i = 0; i < (maxout - 1); src ++)
589 {
590 ch = *src;
591 if (ch == 0)
592 break;
593
594 /*
595 * Byte swap input UTF-32, if necessary...
596 */
597 if (swap)
598 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
599
600 /*
601 * Check for leading BOM (and delete from output)...
602 */
603 if ((src == first) && (ch == 0xfeff))
604 continue;
605
606 /*
607 * Check for beyond Plane 16 (invalid UTF-32)...
608 */
609 if (ch > 0x10ffff)
610 return (-1);
611
612 /*
613 * Convert beyond Plane 0 (BMP) to Replacement Character...
614 */
615 if (ch > 0xffff)
616 ch = 0xfffd;
617
618 /*
619 * Convert UTF-32 character to UTF-8 character(s)...
620 */
621 if (ch <= 0x7f)
622 {
623 /*
624 * One-octet UTF-8 <= 127 (US-ASCII)...
625 */
626 *dest = (cups_utf8_t) ch;
627 dest ++;
628 i ++;
629 }
630 else if (ch <= 0x7ff)
631 {
632 /*
633 * Two-octet UTF-8 <= 2047 (Latin-x)...
634 */
635 if (i > (maxout - 2))
636 break;
637 *dest = (cups_utf8_t) (0xc0 | ((ch >> 6) & 0x1f));
638 dest ++;
639 i ++;
640 *dest = (cups_utf8_t) (0x80 | (ch & 0x3f));
641 dest ++;
642 i ++;
643 }
644 else
645 {
646 /*
647 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
648 */
649 if (i > (maxout - 3))
650 break;
651 *dest = (cups_utf8_t) (0xe0 | ((ch >> 12) & 0x0f));
652 dest ++;
653 i ++;
654 *dest = (cups_utf8_t) (0x80 | ((ch >> 6) & 0x3f));
655 dest ++;
656 i ++;
657 *dest = (cups_utf8_t) (0x80 | (ch & 0x3f));
658 dest ++;
659 i ++;
660 }
661 }
662 *dest = '\0';
663 i = (int) (dest - start);
664 return (i);
665 }
666
667 /*
668 * 'cupsUTF16ToUTF32()' - Convert UTF-16 to UTF-32.
669 *
670 * This code does not support Unicode beyond 16-bits (Plane 0)...
671 */
672 int /* O - Count or -1 on error */
673 cupsUTF16ToUTF32(cups_utf32_t *dest, /* O - Target string */
674 const cups_utf16_t *src, /* I - Source string */
675 const int maxout) /* I - Max output */
676 {
677 int i; /* Looping variable */
678 int swap = 0; /* Byte-swap input to output */
679 int surrogate = 0; /* Expecting low-half surrogate */
680 cups_utf32_t ch; /* Character value */
681
682 /*
683 * Check for valid arguments and clear output...
684 */
685 if ((dest == NULL)
686 || (src == NULL)
687 || (maxout < 1)
688 || (maxout > CUPS_MAX_USTRING))
689 return (-1);
690 *dest = 0;
691
692 /*
693 * Check for leading BOM in UTF-16 and inverted BOM...
694 */
695 if (*src == 0xfffe)
696 swap = 1;
697
698 /*
699 * Convert input UTF-16 to output UTF-32...
700 */
701 for (i = 0; i < (maxout - 1); src ++)
702 {
703 ch = (cups_utf32_t) (*src & 0xffff);
704 if (ch == 0)
705 break;
706 i ++;
707
708 /*
709 * Byte swap input UTF-16, if necessary...
710 */
711 if (swap)
712 ch = (cups_utf32_t) ((ch << 8) | (ch >> 8));
713
714 /*
715 * Discard expected UTF-16 low-half surrogate...
716 */
717 if ((ch >= 0xdc00) && (ch <= 0xdfff))
718 {
719 if (surrogate == 0)
720 return (-1);
721 surrogate = 0;
722 continue;
723 }
724
725 /*
726 * Convert UTF-16 high-half surrogate to Replacement Character...
727 */
728 if ((ch >= 0xd800) && (ch <= 0xdbff))
729 {
730 if (surrogate == 1)
731 return (-1);
732 surrogate = 1;
733 ch = 0xfffd;
734 }
735 *dest = ch;
736 dest ++;
737 }
738 *dest = 0;
739 return (i);
740 }
741
742 /*
743 * 'cupsUTF32ToUTF16()' - Convert UTF-32 to UTF-16.
744 *
745 * This code does not support Unicode beyond 16-bits (Plane 0)...
746 */
747 int /* O - Count or -1 on error */
748 cupsUTF32ToUTF16(cups_utf16_t *dest, /* O - Target string */
749 const cups_utf32_t *src, /* I - Source string */
750 const int maxout) /* I - Max output */
751 {
752 int i; /* Looping variable */
753 int swap = 0; /* Byte-swap input to output */
754 cups_utf32_t ch; /* Character value */
755
756 /*
757 * Check for valid arguments and clear output...
758 */
759 if ((dest == NULL)
760 || (src == NULL)
761 || (maxout < 1)
762 || (maxout > CUPS_MAX_USTRING))
763 return (-1);
764 *dest = 0;
765
766 /*
767 * Check for leading BOM in UTF-32 and inverted BOM...
768 */
769 if (*src == 0xfffe0000)
770 swap = 1;
771
772 /*
773 * Convert input UTF-32 to output UTF-16 (w/out surrogate pairs)...
774 */
775 for (i = 0; i < (maxout - 1); src ++, dest ++)
776 {
777 ch = *src;
778 if (ch == 0)
779 break;
780 i ++;
781
782 /*
783 * Byte swap input UTF-32, if necessary...
784 */
785 if (swap)
786 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
787
788 /*
789 * Check for UTF-16 surrogate (illegal UTF-32)...
790 */
791 if ((ch >= 0xd800) && (ch <= 0xdfff))
792 return (-1);
793
794 /*
795 * Check for beyond Plane 16 (invalid UTF-32)...
796 */
797 if (ch > 0x10ffff)
798 return (-1);
799
800 /*
801 * Convert beyond Plane 0 (BMP) to Replacement Character...
802 */
803 if (ch > 0xffff)
804 ch = 0xfffd;
805 *dest = (cups_utf16_t) ch;
806 }
807 *dest = 0;
808 return (i);
809 }
810
811 /*
812 * 'get_charmap_count()' - Count lines in a charmap file.
813 */
814 static int /* O - Count or -1 on error */
815 get_charmap_count(const char *filename) /* I - Charmap Filename */
816 {
817 int i; /* Looping variable */
818 FILE *fp; /* Map input file pointer */
819 char *s; /* Line parsing pointer */
820 char line[256]; /* Line from input map file */
821 cups_utf32_t unichar; /* Unicode character value */
822
823 /*
824 * Open map input file...
825 */
826 if ((filename == NULL) || (*filename == '\0'))
827 return (-1);
828 fp = fopen(filename, "r");
829 if (fp == NULL)
830 return (-1);
831
832 /*
833 * Count lines in map input file...
834 */
835 for (i = 0; i < CUPS_MAX_CHARMAP_LINES;)
836 {
837 s = fgets(&line[0], sizeof(line), fp);
838 if (s == NULL)
839 break;
840 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
841 continue;
842 while ((*s != 0) && (*s != ' ') && (*s != '\t'))
843 s ++;
844 while ((*s == ' ') || (*s == '\t'))
845 s ++;
846 if (strncmp (s, "0x", 2) == 0)
847 s += 2;
848 if ((sscanf(s, "%lx", &unichar) != 1)
849 || (unichar > 0xffff))
850 {
851 fclose(fp);
852 return (-1);
853 }
854 i ++;
855 }
856 if (i == 0)
857 i = -1;
858
859 /*
860 * Close file and return charmap count (non-comment line count)...
861 */
862 fclose(fp);
863 return (i);
864 }
865
866 /*
867 * 'get_sbcs_charmap()' - Get SBCS Charmap.
868 */
869 static cups_cmap_t * /* O - Charmap or 0 on error */
870 get_sbcs_charmap(const cups_encoding_t encoding,
871 /* I - Charmap Encoding */
872 const char *filename) /* I - Charmap Filename */
873 {
874 int i; /* Loop variable */
875 unsigned long legchar; /* Legacy character value */
876 cups_utf32_t unichar; /* Unicode character value */
877 cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
878 FILE *fp; /* Charset map file pointer */
879 char *s; /* Line parsing pointer */
880 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
881 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
882 char line[256]; /* Line from charset map file */
883
884 /*
885 * Check for valid arguments...
886 */
887 if ((encoding < 0) || (filename == NULL))
888 return (NULL);
889
890 /*
891 * See if we already have this SBCS charset map loaded...
892 */
893 for (cmap = cmap_cache; cmap != NULL; cmap = cmap->next)
894 {
895 if (cmap->encoding == encoding)
896 {
897 cmap->used ++;
898 return ((void *) cmap);
899 }
900 }
901
902 /*
903 * Open SBCS charset map input file...
904 */
905 fp = fopen(filename, "r");
906 if (fp == NULL)
907 return (NULL);
908
909 /*
910 * Allocate memory for SBCS charset map and add to cache...
911 */
912 cmap = (cups_cmap_t *) calloc(1, sizeof(cups_cmap_t));
913 if (cmap == NULL)
914 {
915 fclose(fp);
916 return (NULL);
917 }
918 cmap->next = cmap_cache;
919 cmap_cache = cmap;
920 cmap->used ++;
921 cmap->encoding = encoding;
922
923 /*
924 * Save SBCS charset map into memory for transcoding...
925 */
926 for (i = 0; i < CUPS_MAX_CHARMAP_LINES;)
927 {
928 s = fgets(&line[0], sizeof(line), fp);
929 if (s == NULL)
930 break;
931 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
932 continue;
933 if (strncmp (s, "0x", 2) == 0)
934 s += 2;
935 if ((sscanf(s, "%lx", &legchar) != 1)
936 || (legchar > 0xff))
937 {
938 fclose(fp);
939 cupsCharmapFlush();
940 return (NULL);
941 }
942 while ((*s != 0) && (*s != ' ') && (*s != '\t'))
943 s ++;
944 while ((*s == ' ') || (*s == '\t'))
945 s ++;
946 if (strncmp (s, "0x", 2) == 0)
947 s += 2;
948 if (sscanf(s, "%lx", &unichar) != 1)
949 {
950 fclose(fp);
951 cupsCharmapFlush();
952 return (NULL);
953 }
954 i ++;
955
956 /*
957 * Convert beyond Plane 0 (BMP) to Replacement Character...
958 */
959 if (unichar > 0xffff)
960 unichar = 0xfffd;
961
962 /*
963 * Save legacy to Unicode mapping in direct lookup table...
964 */
965 crow = &cmap->char2uni[(int) legchar];
966 *crow = (cups_ucs2_t) (unichar & 0xffff);
967
968 /*
969 * Save Unicode to legacy mapping in indirect lookup table...
970 */
971 srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)];
972 if (srow == NULL)
973 {
974 srow = (cups_sbcs_t *) calloc(256, sizeof(cups_sbcs_t));
975 if (srow == NULL)
976 {
977 fclose(fp);
978 cupsCharmapFlush();
979 return (NULL);
980 }
981 cmap->uni2char[(int) ((unichar >> 8) & 0xff)] = srow;
982 }
983 srow += (int) (unichar & 0xff);
984
985 /*
986 * Convert Replacement Character to visible replacement...
987 */
988 if (unichar == 0xfffd)
989 legchar = (unsigned long) '?';
990
991 /*
992 * First (oldest) legacy character uses Unicode mapping cell...
993 */
994 if (*srow == 0)
995 *srow = (cups_sbcs_t) legchar;
996 }
997 fclose(fp);
998 return (cmap);
999 }
1000
1001 /*
1002 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1003 */
1004 static cups_vmap_t * /* O - Charmap or 0 on error */
1005 get_vbcs_charmap(const cups_encoding_t encoding,
1006 /* I - Charmap Encoding */
1007 const char *filename) /* I - Charmap Filename */
1008 {
1009 cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1010 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1011 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1012 cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1013 cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
1014 unsigned long legchar; /* Legacy character value */
1015 cups_utf32_t unichar; /* Unicode character value */
1016 int mapcount; /* Count of lines in charmap file */
1017 FILE *fp; /* Charset map file pointer */
1018 char *s; /* Line parsing pointer */
1019 char line[256]; /* Line from charset map file */
1020 int i; /* Loop variable */
1021 int wide; /* 32-bit legacy char */
1022
1023 /*
1024 * Check for valid arguments...
1025 */
1026 if ((encoding < 0) || (filename == NULL))
1027 return (NULL);
1028
1029 /*
1030 * See if we already have this DBCS/VBCS charset map loaded...
1031 */
1032 for (vmap = vmap_cache; vmap != NULL; vmap = vmap->next)
1033 {
1034 if (vmap->encoding == encoding)
1035 {
1036 vmap->used ++;
1037 return ((void *) vmap);
1038 }
1039 }
1040
1041 /*
1042 * Count lines in charmap file...
1043 */
1044 mapcount = get_charmap_count(filename);
1045 if (mapcount <= 0)
1046 return (NULL);
1047
1048 /*
1049 * Open VBCS charset map input file...
1050 */
1051 fp = fopen(filename, "r");
1052 if (fp == NULL)
1053 return (NULL);
1054
1055 /*
1056 * Allocate memory for DBCS/VBCS charset map and add to cache...
1057 */
1058 vmap = (cups_vmap_t *) calloc(1, sizeof(cups_vmap_t));
1059 if (vmap == NULL)
1060 {
1061 fclose(fp);
1062 return (NULL);
1063 }
1064 vmap->next = vmap_cache;
1065 vmap_cache = vmap;
1066 vmap->used ++;
1067 vmap->encoding = encoding;
1068
1069 /*
1070 * Save DBCS/VBCS charset map into memory for transcoding...
1071 */
1072 leadchar = 0;
1073 wide2uni = NULL;
1074
1075 for (i = 0, wide = 0; i < mapcount; )
1076 {
1077 s = fgets(&line[0], sizeof(line), fp);
1078 if (s == NULL)
1079 break;
1080 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1081 continue;
1082 if (strncmp (s, "0x", 2) == 0)
1083 s += 2;
1084 if ((sscanf(s, "%lx", &legchar) != 1)
1085 || ((legchar > 0xffff) && (encoding < CUPS_ENCODING_DBCS_END)))
1086 {
1087 fclose(fp);
1088 cupsCharmapFlush();
1089 return (NULL);
1090 }
1091 while ((*s != 0) && (*s != ' ') && (*s != '\t'))
1092 s ++;
1093 while ((*s == ' ') || (*s == '\t'))
1094 s ++;
1095 if (strncmp (s, "0x", 2) == 0)
1096 s += 2;
1097 if (sscanf(s, "%lx", &unichar) != 1)
1098 {
1099 fclose(fp);
1100 cupsCharmapFlush();
1101 return (NULL);
1102 }
1103 i ++;
1104
1105 /*
1106 * Convert beyond Plane 0 (BMP) to Replacement Character...
1107 */
1108 if (unichar > 0xffff)
1109 unichar = 0xfffd;
1110
1111 /*
1112 * Save lead char of 2/3/4-byte legacy char...
1113 */
1114 if ((legchar > 0xff) && (legchar <= 0xffff))
1115 {
1116 leadchar = (cups_sbcs_t) (legchar >> 8);
1117 vmap->lead2char[leadchar] = leadchar;
1118 }
1119 if ((legchar > 0xffff) && (legchar <= 0xffffff))
1120 {
1121 leadchar = (cups_sbcs_t) (legchar >> 16);
1122 vmap->lead3char[leadchar] = leadchar;
1123 }
1124 if (legchar > 0xffffff)
1125 {
1126 leadchar = (cups_sbcs_t) (legchar >> 24);
1127 vmap->lead4char[leadchar] = leadchar;
1128 }
1129
1130 /*
1131 * Save Legacy to Unicode mapping...
1132 */
1133 if (legchar <= 0xffff)
1134 {
1135 /*
1136 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1137 */
1138 crow = vmap->char2uni[(int) leadchar];
1139 if (crow == NULL)
1140 {
1141 crow = (cups_ucs2_t *) calloc(256, sizeof(cups_ucs2_t));
1142 if (crow == NULL)
1143 {
1144 fclose(fp);
1145 cupsCharmapFlush();
1146 return (NULL);
1147 }
1148 vmap->char2uni[(int) leadchar] = crow;
1149 }
1150 crow += (int) (legchar & 0xff);
1151 *crow = (cups_vbcs_t) unichar;
1152 }
1153 else
1154 {
1155 /*
1156 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1157 */
1158 if (wide == 0)
1159 {
1160 wide = 1;
1161 vmap->widecount = (mapcount - i + 1);
1162 wide2uni = (cups_wide2uni_t *)
1163 calloc(vmap->widecount, sizeof(cups_wide2uni_t));
1164 if (wide2uni == NULL)
1165 {
1166 fclose(fp);
1167 cupsCharmapFlush();
1168 return (NULL);
1169 }
1170 vmap->wide2uni = wide2uni;
1171 }
1172 wide2uni->widechar = (cups_vbcs_t) legchar;
1173 wide2uni->unichar = unichar;
1174 wide2uni ++;
1175 }
1176
1177 /*
1178 * Save Unicode to legacy mapping in indirect lookup table...
1179 */
1180 vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1181 if (vrow == NULL)
1182 {
1183 vrow = (cups_vbcs_t *) calloc(256, sizeof(cups_vbcs_t));
1184 if (vrow == NULL)
1185 {
1186 fclose(fp);
1187 cupsCharmapFlush();
1188 return (NULL);
1189 }
1190 vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1191 }
1192 vrow += (int) (unichar & 0xff);
1193
1194 /*
1195 * Convert Replacement Character to visible replacement...
1196 */
1197 if (unichar == 0xfffd)
1198 legchar = (unsigned long) '?';
1199
1200 /*
1201 * First (oldest) legacy character uses Unicode mapping cell...
1202 */
1203 if (*vrow == 0)
1204 *vrow = (cups_vbcs_t) legchar;
1205 }
1206 vmap->charcount = (i - vmap->widecount);
1207 fclose(fp);
1208 return (vmap);
1209 }
1210
1211 /*
1212 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
1213 */
1214 static int /* O - Count or -1 on error */
1215 conv_utf8_to_sbcs(char *dest, /* O - Target string */
1216 const cups_utf8_t *src, /* I - Source string */
1217 const int maxout, /* I - Max output */
1218 const cups_encoding_t encoding) /* I - Encoding */
1219 {
1220 char *start = dest; /* Start of destination string */
1221 cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1222 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
1223 cups_utf32_t unichar; /* Character value */
1224 int worklen; /* Internal UCS-4 string length */
1225 cups_utf32_t work[CUPS_MAX_USTRING];
1226 /* Internal UCS-4 string */
1227 int i; /* Looping variable */
1228
1229 /*
1230 * Check for valid arguments and clear output...
1231 */
1232 if ((dest == NULL)
1233 || (src == NULL)
1234 || (maxout < 1)
1235 || (maxout > CUPS_MAX_USTRING)
1236 || (encoding == CUPS_UTF8))
1237 return (-1);
1238 *dest = '\0';
1239
1240 /*
1241 * Find legacy charset map in cache...
1242 */
1243 cmap = (cups_cmap_t *) cupsCharmapGet(encoding);
1244 if (cmap == NULL)
1245 return (-1);
1246
1247 /*
1248 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1249 */
1250 worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
1251 if (worklen < 0)
1252 return (-1);
1253
1254 /*
1255 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
1256 */
1257 for (i = 0; i < worklen;)
1258 {
1259 unichar = work[i];
1260 if (unichar == 0)
1261 break;
1262 i ++;
1263
1264 /*
1265 * Check for leading BOM (and delete from output)...
1266 */
1267 if ((i == 1) && (unichar == 0xfeff))
1268 continue;
1269
1270 /*
1271 * Convert ASCII verbatim (optimization)...
1272 */
1273 if (unichar <= 0x7f)
1274 {
1275 *dest = (char) unichar;
1276 dest ++;
1277 continue;
1278 }
1279
1280 /*
1281 * Convert unknown character to visible replacement...
1282 */
1283 srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1284 if (srow)
1285 srow += (int) (unichar & 0xff);
1286 if ((srow == NULL) || (*srow == 0))
1287 *dest = '?';
1288 else
1289 *dest = (char) (*srow);
1290 dest ++;
1291 }
1292 *dest = '\0';
1293 worklen = (int) (dest - start);
1294 cupsCharmapFree(encoding);
1295 return (worklen);
1296 }
1297
1298 /*
1299 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
1300 */
1301 static int /* O - Count or -1 on error */
1302 conv_utf8_to_vbcs(char *dest, /* O - Target string */
1303 const cups_utf8_t *src, /* I - Source string */
1304 const int maxout, /* I - Max output */
1305 const cups_encoding_t encoding) /* I - Encoding */
1306 {
1307 char *start = dest; /* Start of destination string */
1308 cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
1309 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1310 cups_utf32_t unichar; /* Character value */
1311 cups_vbcs_t legchar; /* Legacy character value */
1312 int worklen; /* Internal UCS-4 string length */
1313 cups_utf32_t work[CUPS_MAX_USTRING];
1314 /* Internal UCS-4 string */
1315 int i; /* Looping variable */
1316
1317 /*
1318 * Check for valid arguments and clear output...
1319 */
1320 if ((dest == NULL)
1321 || (src == NULL)
1322 || (maxout < 1)
1323 || (maxout > CUPS_MAX_USTRING)
1324 || (encoding == CUPS_UTF8))
1325 return (-1);
1326 *dest = '\0';
1327
1328 /*
1329 * Find legacy charset map in cache...
1330 */
1331 vmap = (cups_vmap_t *) cupsCharmapGet(encoding);
1332 if (vmap == NULL)
1333 return (-1);
1334
1335 /*
1336 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1337 */
1338 worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
1339 if (worklen < 0)
1340 return (-1);
1341
1342 /*
1343 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
1344 */
1345 for (i = 0; i < worklen;)
1346 {
1347 unichar = work[i];
1348 if (unichar == 0)
1349 break;
1350 i ++;
1351
1352 /*
1353 * Check for leading BOM (and delete from output)...
1354 */
1355 if ((i == 1) && (unichar == 0xfeff))
1356 continue;
1357
1358 /*
1359 * Convert ASCII verbatim (optimization)...
1360 */
1361 if (unichar <= 0x7f)
1362 {
1363 *dest = (char) unichar;
1364 dest ++;
1365 continue;
1366 }
1367
1368 /*
1369 * Convert unknown character to visible replacement...
1370 */
1371 vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1372 if (vrow)
1373 vrow += (int) (unichar & 0xff);
1374 if ((vrow == NULL) || (*vrow == 0))
1375 legchar = (cups_vbcs_t) '?';
1376 else
1377 legchar = (cups_vbcs_t) *vrow;
1378
1379 /*
1380 * Save n-byte legacy character...
1381 */
1382 if (legchar > 0xffffff)
1383 {
1384 *dest = (char) ((legchar >> 24) & 0xff);
1385 dest++;
1386 }
1387 if (legchar > 0xffff)
1388 {
1389 *dest = (char) ((legchar >> 16) & 0xff);
1390 dest++;
1391 }
1392 if (legchar > 0xff)
1393 {
1394 *dest = (char) ((legchar >> 8) & 0xff);
1395 dest++;
1396 }
1397 *dest = (char) (legchar & 0xff);
1398 dest ++;
1399 }
1400 *dest = '\0';
1401 worklen = (int) (dest - start);
1402 cupsCharmapFree(encoding);
1403 return (worklen);
1404 }
1405
1406 /*
1407 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
1408 */
1409 static int /* O - Count or -1 on error */
1410 conv_sbcs_to_utf8(cups_utf8_t *dest, /* O - Target string */
1411 const char *src, /* I - Source string */
1412 const int maxout, /* I - Max output */
1413 const cups_encoding_t encoding) /* I - Encoding */
1414 {
1415 cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1416 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1417 unsigned long legchar; /* Legacy character value */
1418 cups_utf32_t unichar; /* Unicode character value */
1419 int worklen; /* Internal UCS-4 string length */
1420 cups_utf32_t work[CUPS_MAX_USTRING];
1421 /* Internal UCS-4 string */
1422 int i; /* Looping variable */
1423
1424 /*
1425 * Check for valid arguments and clear output...
1426 */
1427 if ((dest == NULL)
1428 || (src == NULL)
1429 || (maxout < 1)
1430 || (maxout > CUPS_MAX_USTRING)
1431 || (encoding == CUPS_UTF8))
1432 return (-1);
1433 *dest = '\0';
1434
1435 /*
1436 * Find legacy charset map in cache...
1437 */
1438 cmap = (cups_cmap_t *) cupsCharmapGet(encoding);
1439 if (cmap == NULL)
1440 return (-1);
1441
1442 /*
1443 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1444 */
1445 work[0] = 0xfeff;
1446 for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++)
1447 {
1448 if (*src == '\0')
1449 break;
1450 legchar = (unsigned long) *src;
1451
1452 /*
1453 * Convert ASCII verbatim (optimization)...
1454 */
1455 if (legchar <= 0x7f)
1456 {
1457 work[i] = (cups_utf32_t) legchar;
1458 i ++;
1459 continue;
1460 }
1461
1462 /*
1463 * Convert unknown character to Replacement Character...
1464 */
1465 crow = &cmap->char2uni[0];
1466 crow += (int) legchar;
1467 if (*crow == 0)
1468 unichar = 0xfffd;
1469 else
1470 unichar = (cups_utf32_t) *crow;
1471 work[i] = unichar;
1472 i ++;
1473 }
1474 work[i] = 0;
1475
1476 /*
1477 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1478 */
1479 worklen = cupsUTF32ToUTF8(dest, work, maxout);
1480 cupsCharmapFree(encoding);
1481 return (worklen);
1482 }
1483
1484
1485 /*
1486 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1487 */
1488 static int /* O - Count or -1 on error */
1489 conv_vbcs_to_utf8(cups_utf8_t *dest, /* O - Target string */
1490 const char *src, /* I - Source string */
1491 const int maxout, /* I - Max output */
1492 const cups_encoding_t encoding) /* I - Encoding */
1493 {
1494 cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1495 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1496 cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1497 cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
1498 cups_vbcs_t legchar; /* Legacy character value */
1499 cups_utf32_t unichar; /* Unicode character value */
1500 int i; /* Looping variable */
1501 int worklen; /* Internal UCS-4 string length */
1502 cups_utf32_t work[CUPS_MAX_USTRING];
1503 /* Internal UCS-4 string */
1504
1505 /*
1506 * Check for valid arguments and clear output...
1507 */
1508 if ((dest == NULL)
1509 || (src == NULL)
1510 || (maxout < 1)
1511 || (maxout > CUPS_MAX_USTRING)
1512 || (encoding == CUPS_UTF8))
1513 return (-1);
1514 *dest = '\0';
1515
1516 /*
1517 * Find legacy charset map in cache...
1518 */
1519 vmap = (cups_vmap_t *) cupsCharmapGet(encoding);
1520 if (vmap == NULL)
1521 return (-1);
1522
1523 /*
1524 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1525 */
1526 work[0] = 0xfeff;
1527 for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++)
1528 {
1529 if (*src == '\0')
1530 break;
1531 legchar = (cups_vbcs_t) *src;
1532 leadchar = (cups_sbcs_t) *src;
1533
1534 /*
1535 * Convert ASCII verbatim (optimization)...
1536 */
1537 if (legchar <= 0x7f)
1538 {
1539 work[i] = (cups_utf32_t) legchar;
1540 i ++;
1541 continue;
1542 }
1543
1544 /*
1545 * Convert 2-byte legacy character...
1546 */
1547 if (vmap->lead2char[(int) leadchar] == leadchar)
1548 {
1549 src ++;
1550 if (*src == '\0')
1551 return (-1);
1552 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1553
1554 /*
1555 * Convert unknown character to Replacement Character...
1556 */
1557 crow = vmap->char2uni[(int) ((legchar >> 8) & 0xff)];
1558 if (crow)
1559 crow += (int) (legchar & 0xff);
1560 if ((crow == NULL) || (*crow == 0))
1561 unichar = 0xfffd;
1562 else
1563 unichar = (cups_utf32_t) *crow;
1564 work[i] = unichar;
1565 i ++;
1566 continue;
1567 }
1568
1569 /*
1570 * Fetch 3-byte or 4-byte legacy character...
1571 */
1572 if (vmap->lead3char[(int) leadchar] == leadchar)
1573 {
1574 src ++;
1575 if (*src == '\0')
1576 return (-1);
1577 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1578 src ++;
1579 if (*src == '\0')
1580 return (-1);
1581 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1582 }
1583 else if (vmap->lead4char[(int) leadchar] == leadchar)
1584 {
1585 src ++;
1586 if (*src == '\0')
1587 return (-1);
1588 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1589 src ++;
1590 if (*src == '\0')
1591 return (-1);
1592 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1593 src ++;
1594 if (*src == '\0')
1595 return (-1);
1596 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1597 }
1598 else
1599 return (-1);
1600
1601 /*
1602 * Find 3-byte or 4-byte legacy character...
1603 */
1604 wide2uni = vmap->wide2uni;
1605 wide2uni = (cups_wide2uni_t *) bsearch(&legchar,
1606 vmap->wide2uni,
1607 vmap->widecount,
1608 sizeof(cups_wide2uni_t),
1609 compare_wide);
1610
1611 /*
1612 * Convert unknown character to Replacement Character...
1613 */
1614 if ((wide2uni == NULL) || (wide2uni->unichar == 0))
1615 unichar = 0xfffd;
1616 else
1617 unichar = wide2uni->unichar;
1618 work[i] = unichar;
1619 i ++;
1620 }
1621 work[i] = 0;
1622
1623 /*
1624 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1625 */
1626 worklen = cupsUTF32ToUTF8(dest, work, maxout);
1627 cupsCharmapFree(encoding);
1628 return (worklen);
1629 }
1630
1631 /*
1632 * 'compare_wide()' - Compare key for wide (VBCS) match.
1633 */
1634 static int
1635 compare_wide(const void *k1, /* I - Key char */
1636 const void *k2) /* I - Map char */
1637 {
1638 cups_vbcs_t *kp = (cups_vbcs_t *) k1;
1639 /* Key char pointer */
1640 cups_wide2uni_t *mp = (cups_wide2uni_t *) k2;
1641 /* Map char pointer */
1642 cups_vbcs_t key; /* Legacy key character */
1643 cups_vbcs_t map; /* Legacy map character */
1644 int result; /* Result Value */
1645
1646 key = *kp;
1647 map = mp->widechar;
1648 if (key >= map)
1649 result = (int) (key - map);
1650 else
1651 result = -1 * ((int) (map - key));
1652 return (result);
1653 }
1654
1655 /*
1656 * End of "$Id$"
1657 */