]> git.ipfire.org Git - thirdparty/cups.git/blob - cups/transcode.c
Make CUPS API threadsafe (STR #1276), replace FILE's with
[thirdparty/cups.git] / cups / transcode.c
1 /*
2 * "$Id$"
3 *
4 * Transcoding support for the Common UNIX Printing System (CUPS).
5 *
6 * Copyright 1997-2005 by Easy Software Products.
7 *
8 * These coded instructions, statements, and computer programs are
9 * the property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the
11 * file "LICENSE.txt" which should have been included with this file.
12 * If this file is missing or damaged please contact Easy Software
13 * Products at:
14 *
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
19 *
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
23 *
24 * Contents:
25 *
26 * cupsCharmapGet() - Get a character set map.
27 * cupsCharmapFree() - Free a character set map.
28 * cupsCharmapFlush() - Flush all character set maps out of cache.
29 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
30 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
31 * cupsUTF8ToUTF16() - Convert UTF-8 to UTF-16.
32 * cupsUTF16ToUTF8() - Convert UTF-16 to UTF-8.
33 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
34 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
35 * cupsUTF16ToUTF32() - Convert UTF-16 to UTF-32.
36 * cupsUTF32ToUTF16() - Convert UTF-32 to UTF-16.
37 * get_charmap_count() - Count lines in a charmap file.
38 * get_sbcs_charmap() - Get SBCS Charmap.
39 * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
40 * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
41 * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
42 * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
43 * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
44 * compare_wide() - Compare key for wide (VBCS) match.
45 */
46
47 /*
48 * Include necessary headers...
49 */
50
51 #include "globals.h"
52 #include <stdlib.h>
53 #include <errno.h>
54 #include <time.h>
55
56
57 /*
58 * Prototypes...
59 */
60
61 static int get_charmap_count(const char *filename);
62 static cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding,
63 const char *filename);
64 static cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding,
65 const char *filename);
66
67 static int conv_utf8_to_sbcs(char *dest,
68 const cups_utf8_t *src,
69 const int maxout,
70 const cups_encoding_t encoding);
71 static int conv_utf8_to_vbcs(char *dest,
72 const cups_utf8_t *src,
73 const int maxout,
74 const cups_encoding_t encoding);
75
76 static int conv_sbcs_to_utf8(cups_utf8_t *dest,
77 const char *src,
78 const int maxout,
79 const cups_encoding_t encoding);
80 static int conv_vbcs_to_utf8(cups_utf8_t *dest,
81 const char *src,
82 const int maxout,
83 const cups_encoding_t encoding);
84
85 static int compare_wide(const void *k1, const void *k2);
86
87 /*
88 * 'cupsCharmapGet()' - Get a character set map.
89 *
90 * This code handles single-byte (SBCS), double-byte (DBCS), and
91 * variable-byte (VBCS) character sets _without_ charset escapes...
92 * This code does not handle multiple-byte character sets (MBCS)
93 * (such as ISO-2022-JP) with charset switching via escapes...
94 */
95 void * /* O - Charset map pointer */
96 cupsCharmapGet(const cups_encoding_t encoding)
97 /* I - Encoding */
98 {
99 char *datadir; /* CUPS_DATADIR environment variable */
100 char mapname[80]; /* Name of charset map */
101 char filename[1024]; /* Filename for charset map file */
102
103 /*
104 * Check for valid arguments...
105 */
106 if ((encoding < 0) || (encoding >= CUPS_ENCODING_VBCS_END))
107 return (NULL);
108
109 /*
110 * Get the data directory and charset map name...
111 */
112 if ((datadir = getenv("CUPS_DATADIR")) == NULL)
113 datadir = CUPS_DATADIR;
114 snprintf(mapname, sizeof(mapname), "%s.txt", cupsEncodingName(encoding));
115 snprintf(filename, sizeof(filename), "%s/charmaps/%s",
116 datadir, mapname);
117
118 /*
119 * Read charset map input file into cache...
120 */
121 if (encoding < CUPS_ENCODING_SBCS_END)
122 return (get_sbcs_charmap(encoding, filename));
123 else if (encoding < CUPS_ENCODING_VBCS_END)
124 return (get_vbcs_charmap(encoding, filename));
125 else
126 return (NULL);
127 }
128
129 /*
130 * 'cupsCharmapFree()' - Free a character set map.
131 *
132 * This does not actually free; use 'cupsCharmapFlush()' for that.
133 */
134 void
135 cupsCharmapFree(const cups_encoding_t encoding)
136 /* I - Encoding */
137 {
138 cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
139 cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
140 cups_globals_t *cg = _cupsGlobals();
141 /* Pointer to library globals */
142
143 /*
144 * See if we already have this SBCS charset map loaded...
145 */
146 for (cmap = cg->cmap_cache; cmap != NULL; cmap = cmap->next)
147 {
148 if (cmap->encoding == encoding)
149 {
150 if (cmap->used > 0)
151 cmap->used --;
152 return;
153 }
154 }
155
156 /*
157 * See if we already have this DBCS/VBCS charset map loaded...
158 */
159 for (vmap = cg->vmap_cache; vmap != NULL; vmap = vmap->next)
160 {
161 if (vmap->encoding == encoding)
162 {
163 if (vmap->used > 0)
164 vmap->used --;
165 return;
166 }
167 }
168 return;
169 }
170
171 /*
172 * 'cupsCharmapFlush()' - Flush all character set maps out of cache.
173 */
174 void
175 cupsCharmapFlush(void)
176 {
177 int i; /* Looping variable */
178 cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
179 cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
180 cups_cmap_t *cnext; /* Next Legacy SBCS Charset Map */
181 cups_vmap_t *vnext; /* Next Legacy VBCS Charset Map */
182 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
183 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
184 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
185 cups_globals_t *cg = _cupsGlobals();
186 /* Pointer to library globals */
187
188 /*
189 * Loop through SBCS charset map cache, free all memory...
190 */
191 for (cmap = cg->cmap_cache; cmap != NULL; cmap = cnext)
192 {
193 for (i = 0; i < 256; i ++)
194 {
195 if ((srow = cmap->uni2char[i]) != NULL)
196 free(srow);
197 }
198 cnext = cmap->next;
199 free(cmap);
200 }
201 cg->cmap_cache = NULL;
202
203 /*
204 * Loop through DBCS/VBCS charset map cache, free all memory...
205 */
206 for (vmap = cg->vmap_cache; vmap != NULL; vmap = vnext)
207 {
208 for (i = 0; i < 256; i ++)
209 {
210 if ((crow = vmap->char2uni[i]) != NULL)
211 free(crow);
212 }
213 for (i = 0; i < 256; i ++)
214 {
215 if ((vrow = vmap->uni2char[i]) != NULL)
216 free(vrow);
217 }
218 if (vmap->wide2uni)
219 free(vmap->wide2uni);
220 vnext = vmap->next;
221 free(vmap);
222 }
223 cg->vmap_cache = NULL;
224 return;
225 }
226
227 /*
228 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
229 *
230 * This code handles single-byte (SBCS), double-byte (DBCS), and
231 * variable-byte (VBCS) character sets _without_ charset escapes...
232 * This code does not handle multiple-byte character sets (MBCS)
233 * (such as ISO-2022-JP) with charset switching via escapes...
234 */
235 int /* O - Count or -1 on error */
236 cupsUTF8ToCharset(char *dest, /* O - Target string */
237 const cups_utf8_t *src, /* I - Source string */
238 const int maxout, /* I - Max output */
239 const cups_encoding_t encoding) /* I - Encoding */
240 {
241 /*
242 * Check for valid arguments...
243 */
244 if ((dest == NULL)
245 || (src == NULL)
246 || (maxout < 1)
247 || (maxout > CUPS_MAX_USTRING)
248 || (encoding < 0)
249 || (encoding == CUPS_UTF8)
250 || (encoding >= CUPS_ENCODING_VBCS_END))
251 return (-1);
252
253 /*
254 * Convert input UTF-8 to legacy charset...
255 */
256 if (encoding < CUPS_ENCODING_SBCS_END)
257 return (conv_utf8_to_sbcs(dest, src, maxout, encoding));
258 else if (encoding < CUPS_ENCODING_VBCS_END)
259 return (conv_utf8_to_vbcs(dest, src, maxout, encoding));
260 else
261 return (-1);
262 }
263
264 /*
265 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
266 *
267 * This code handles single-byte (SBCS), double-byte (DBCS), and
268 * variable-byte (VBCS) character sets _without_ charset escapes...
269 * This code does not handle multiple-byte character sets (MBCS)
270 * (such as ISO-2022-JP) with charset switching via escapes...
271 */
272 int /* O - Count or -1 on error */
273 cupsCharsetToUTF8(cups_utf8_t *dest, /* O - Target string */
274 const char *src, /* I - Source string */
275 const int maxout, /* I - Max output */
276 const cups_encoding_t encoding) /* I - Encoding */
277 {
278 /*
279 * Check for valid arguments...
280 */
281 if ((dest == NULL)
282 || (src == NULL)
283 || (maxout < 1)
284 || (maxout > CUPS_MAX_USTRING)
285 || (encoding < 0)
286 || (encoding == CUPS_UTF8)
287 || (encoding >= CUPS_ENCODING_VBCS_END))
288 return (-1);
289
290 /*
291 * Convert input legacy charset to UTF-8...
292 */
293 if (encoding < CUPS_ENCODING_SBCS_END)
294 return (conv_sbcs_to_utf8(dest, src, maxout, encoding));
295 else if (encoding < CUPS_ENCODING_VBCS_END)
296 return (conv_vbcs_to_utf8(dest, src, maxout, encoding));
297 else
298 return (-1);
299 }
300
301 /*
302 * 'cupsUTF8ToUTF16()' - Convert UTF-8 to UTF-16.
303 *
304 * This code does not support Unicode beyond 16-bits (Plane 0)...
305 */
306 int /* O - Count or -1 on error */
307 cupsUTF8ToUTF16(cups_utf16_t *dest, /* O - Target string */
308 const cups_utf8_t *src, /* I - Source string */
309 const int maxout) /* I - Max output */
310 {
311 int worklen; /* Internal UCS-4 string length */
312 cups_utf32_t work[CUPS_MAX_USTRING];
313 /* Internal UCS-4 string */
314
315 /*
316 * Check for valid arguments and clear output...
317 */
318 if ((dest == NULL)
319 || (src == NULL)
320 || (maxout < 1)
321 || (maxout > CUPS_MAX_USTRING))
322 return (-1);
323 *dest = 0;
324
325 /*
326 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
327 */
328 worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
329 if (worklen < 0)
330 return (-1);
331
332 /*
333 * Convert internal UCS-4 to output UTF-16...
334 */
335 worklen = cupsUTF32ToUTF16(dest, work, maxout);
336 return (worklen);
337 }
338
339 /*
340 * 'cupsUTF16ToUTF8()' - Convert UTF-16 to UTF-8.
341 *
342 * This code does not support Unicode beyond 16-bits (Plane 0)...
343 */
344 int /* O - Count or -1 on error */
345 cupsUTF16ToUTF8(cups_utf8_t *dest, /* O - Target string */
346 const cups_utf16_t *src, /* I - Source string */
347 const int maxout) /* I - Max output */
348 {
349 int worklen; /* Internal UCS-4 string length */
350 cups_utf32_t work[CUPS_MAX_USTRING];
351 /* Internal UCS-4 string */
352
353 /*
354 * Check for valid arguments and clear output...
355 */
356 if ((dest == NULL)
357 || (src == NULL)
358 || (maxout < 1)
359 || (maxout > CUPS_MAX_USTRING))
360 return (-1);
361 *dest = 0;
362
363 /*
364 * Convert input UTF-16 to internal UCS-4 (and byte-swap)...
365 */
366 worklen = cupsUTF16ToUTF32(work, src, CUPS_MAX_USTRING);
367 if (worklen < 0)
368 return (-1);
369
370 /*
371 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
372 */
373 worklen = cupsUTF32ToUTF8(dest, work, maxout);
374 return (worklen);
375 }
376
377 /*
378 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
379 *
380 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
381 *
382 * UTF-32 char UTF-8 char(s)
383 * --------------------------------------------------
384 * 0 to 127 = 0xxxxxxx (US-ASCII)
385 * 128 to 2047 = 110xxxxx 10yyyyyy
386 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
387 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
388 *
389 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
390 * which would convert to five- or six-octet UTF-8 sequences...
391 *
392 * This code does not support Unicode beyond 16-bits (Plane 0)...
393 */
394 int /* O - Count or -1 on error */
395 cupsUTF8ToUTF32(cups_utf32_t *dest, /* O - Target string */
396 const cups_utf8_t *src, /* I - Source string */
397 const int maxout) /* I - Max output */
398 {
399 cups_utf8_t *first = (cups_utf8_t *) src;
400 int srclen; /* Source string length */
401 int i; /* Looping variable */
402 cups_utf32_t ch; /* Character value */
403 cups_utf32_t next; /* Next character value */
404 cups_utf32_t ch32; /* UTF-32 character value */
405
406 /*
407 * Check for valid arguments and clear output...
408 */
409 if ((dest == NULL)
410 || (src == NULL)
411 || (maxout < 1)
412 || (maxout > CUPS_MAX_USTRING))
413 return (-1);
414 *dest = 0;
415
416 /*
417 * Convert input UTF-8 to output UTF-32 (and insert BOM)...
418 */
419 *dest = 0xfeff;
420 dest ++;
421 srclen = strlen((char *) src);
422 for (i = 1; i < (maxout - 1); src ++, dest ++)
423 {
424 ch = (cups_utf32_t) *src;
425 ch &= 0xff;
426 if (ch == 0)
427 break;
428 i ++;
429
430 /*
431 * Convert UTF-8 character(s) to UTF-32 character...
432 */
433 if ((ch & 0x7f) == ch)
434 {
435 /*
436 * One-octet UTF-8 <= 127 (US-ASCII)...
437 */
438 *dest = ch;
439 }
440 else if ((ch & 0xe0) == 0xc0)
441 {
442 /*
443 * Two-octet UTF-8 <= 2047 (Latin-x)...
444 */
445 src ++;
446 next = (cups_utf32_t) *src;
447 next &= 0xff;
448 if (next == 0)
449 return (-1);
450 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
451
452 /*
453 * Check for non-shortest form (invalid UTF-8)...
454 */
455 if (ch32 <= 127)
456 return (-1);
457 *dest = ch32;
458 }
459 else if ((ch & 0xf0) == 0xe0)
460 {
461 /*
462 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
463 */
464 src ++;
465 next = (cups_utf32_t) *src;
466 next &= 0xff;
467 if (next == 0)
468 return (-1);
469 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
470 src ++;
471 next = (cups_utf32_t) *src;
472 next &= 0xff;
473 if (next == 0)
474 return (-1);
475 ch32 = ((ch32 << 6) | (next & 0x3f));
476
477 /*
478 * Check for non-shortest form (invalid UTF-8)...
479 */
480 if (ch32 <= 2047)
481 return (-1);
482 *dest = ch32;
483 }
484 else if ((ch & 0xf8) == 0xf0)
485 {
486 /*
487 * Four-octet UTF-8 to Replacement Character...
488 */
489 if (((src - first) + 3) >= srclen)
490 return (-1);
491 src += 3;
492 *dest = 0xfffd;
493 }
494 else if ((ch & 0xfc) == 0xf8)
495 {
496 /*
497 * Five-octet UTF-8 (invalid strict UTF-32)...
498 */
499 return (-1);
500 }
501 else if ((ch & 0xfe) == 0xfc)
502 {
503 /*
504 * Six-octet UTF-8 (invalid strict UTF-32)...
505 */
506 return (-1);
507 }
508 else
509 {
510 /*
511 * More than six-octet (invalid UTF-8 sequence)...
512 */
513 return (-1);
514 }
515
516 /*
517 * Check for UTF-16 surrogate (illegal UTF-8)...
518 */
519 if ((*dest >= 0xd800) && (*dest <= 0xdfff))
520 return (-1);
521
522 /*
523 * Check for beyond Plane 16 (invalid UTF-8)...
524 */
525 if (*dest > 0x10ffff)
526 return (-1);
527 }
528 *dest = 0;
529 return (i);
530 }
531
532 /*
533 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
534 *
535 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
536 *
537 * UTF-32 char UTF-8 char(s)
538 * --------------------------------------------------
539 * 0 to 127 = 0xxxxxxx (US-ASCII)
540 * 128 to 2047 = 110xxxxx 10yyyyyy
541 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
542 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
543 *
544 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
545 * which would convert to five- or six-octet UTF-8 sequences...
546 *
547 * This code does not support Unicode beyond 16-bits (Plane 0)...
548 */
549 int /* O - Count or -1 on error */
550 cupsUTF32ToUTF8(cups_utf8_t *dest, /* O - Target string */
551 const cups_utf32_t *src, /* I - Source string */
552 const int maxout) /* I - Max output */
553 {
554 cups_utf32_t *first = (cups_utf32_t *) src;
555 /* First source char */
556 cups_utf8_t *start = dest; /* Start of destination string */
557 int i; /* Looping variable */
558 int swap = 0; /* Byte-swap input to output */
559 cups_utf32_t ch; /* Character value */
560
561 /*
562 * Check for valid arguments and clear output...
563 */
564 if ((dest == NULL)
565 || (src == NULL)
566 || (maxout < 1))
567 return (-1);
568 *dest = '\0';
569
570 /*
571 * Check for leading BOM in UTF-32 and inverted BOM...
572 */
573 if (*src == 0xfffe0000)
574 swap = 1;
575
576 /*
577 * Convert input UTF-32 to output UTF-8...
578 */
579 for (i = 0; i < (maxout - 1); src ++)
580 {
581 ch = *src;
582 if (ch == 0)
583 break;
584
585 /*
586 * Byte swap input UTF-32, if necessary...
587 */
588 if (swap)
589 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
590
591 /*
592 * Check for leading BOM (and delete from output)...
593 */
594 if ((src == first) && (ch == 0xfeff))
595 continue;
596
597 /*
598 * Check for beyond Plane 16 (invalid UTF-32)...
599 */
600 if (ch > 0x10ffff)
601 return (-1);
602
603 /*
604 * Convert beyond Plane 0 (BMP) to Replacement Character...
605 */
606 if (ch > 0xffff)
607 ch = 0xfffd;
608
609 /*
610 * Convert UTF-32 character to UTF-8 character(s)...
611 */
612 if (ch <= 0x7f)
613 {
614 /*
615 * One-octet UTF-8 <= 127 (US-ASCII)...
616 */
617 *dest = (cups_utf8_t) ch;
618 dest ++;
619 i ++;
620 }
621 else if (ch <= 0x7ff)
622 {
623 /*
624 * Two-octet UTF-8 <= 2047 (Latin-x)...
625 */
626 if (i > (maxout - 2))
627 break;
628 *dest = (cups_utf8_t) (0xc0 | ((ch >> 6) & 0x1f));
629 dest ++;
630 i ++;
631 *dest = (cups_utf8_t) (0x80 | (ch & 0x3f));
632 dest ++;
633 i ++;
634 }
635 else
636 {
637 /*
638 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
639 */
640 if (i > (maxout - 3))
641 break;
642 *dest = (cups_utf8_t) (0xe0 | ((ch >> 12) & 0x0f));
643 dest ++;
644 i ++;
645 *dest = (cups_utf8_t) (0x80 | ((ch >> 6) & 0x3f));
646 dest ++;
647 i ++;
648 *dest = (cups_utf8_t) (0x80 | (ch & 0x3f));
649 dest ++;
650 i ++;
651 }
652 }
653 *dest = '\0';
654 i = (int) (dest - start);
655 return (i);
656 }
657
658 /*
659 * 'cupsUTF16ToUTF32()' - Convert UTF-16 to UTF-32.
660 *
661 * This code does not support Unicode beyond 16-bits (Plane 0)...
662 */
663 int /* O - Count or -1 on error */
664 cupsUTF16ToUTF32(cups_utf32_t *dest, /* O - Target string */
665 const cups_utf16_t *src, /* I - Source string */
666 const int maxout) /* I - Max output */
667 {
668 int i; /* Looping variable */
669 int swap = 0; /* Byte-swap input to output */
670 int surrogate = 0; /* Expecting low-half surrogate */
671 cups_utf32_t ch; /* Character value */
672
673 /*
674 * Check for valid arguments and clear output...
675 */
676 if ((dest == NULL)
677 || (src == NULL)
678 || (maxout < 1)
679 || (maxout > CUPS_MAX_USTRING))
680 return (-1);
681 *dest = 0;
682
683 /*
684 * Check for leading BOM in UTF-16 and inverted BOM...
685 */
686 if (*src == 0xfffe)
687 swap = 1;
688
689 /*
690 * Convert input UTF-16 to output UTF-32...
691 */
692 for (i = 0; i < (maxout - 1); src ++)
693 {
694 ch = (cups_utf32_t) (*src & 0xffff);
695 if (ch == 0)
696 break;
697 i ++;
698
699 /*
700 * Byte swap input UTF-16, if necessary...
701 */
702 if (swap)
703 ch = (cups_utf32_t) ((ch << 8) | (ch >> 8));
704
705 /*
706 * Discard expected UTF-16 low-half surrogate...
707 */
708 if ((ch >= 0xdc00) && (ch <= 0xdfff))
709 {
710 if (surrogate == 0)
711 return (-1);
712 surrogate = 0;
713 continue;
714 }
715
716 /*
717 * Convert UTF-16 high-half surrogate to Replacement Character...
718 */
719 if ((ch >= 0xd800) && (ch <= 0xdbff))
720 {
721 if (surrogate == 1)
722 return (-1);
723 surrogate = 1;
724 ch = 0xfffd;
725 }
726 *dest = ch;
727 dest ++;
728 }
729 *dest = 0;
730 return (i);
731 }
732
733 /*
734 * 'cupsUTF32ToUTF16()' - Convert UTF-32 to UTF-16.
735 *
736 * This code does not support Unicode beyond 16-bits (Plane 0)...
737 */
738 int /* O - Count or -1 on error */
739 cupsUTF32ToUTF16(cups_utf16_t *dest, /* O - Target string */
740 const cups_utf32_t *src, /* I - Source string */
741 const int maxout) /* I - Max output */
742 {
743 int i; /* Looping variable */
744 int swap = 0; /* Byte-swap input to output */
745 cups_utf32_t ch; /* Character value */
746
747 /*
748 * Check for valid arguments and clear output...
749 */
750 if ((dest == NULL)
751 || (src == NULL)
752 || (maxout < 1)
753 || (maxout > CUPS_MAX_USTRING))
754 return (-1);
755 *dest = 0;
756
757 /*
758 * Check for leading BOM in UTF-32 and inverted BOM...
759 */
760 if (*src == 0xfffe0000)
761 swap = 1;
762
763 /*
764 * Convert input UTF-32 to output UTF-16 (w/out surrogate pairs)...
765 */
766 for (i = 0; i < (maxout - 1); src ++, dest ++)
767 {
768 ch = *src;
769 if (ch == 0)
770 break;
771 i ++;
772
773 /*
774 * Byte swap input UTF-32, if necessary...
775 */
776 if (swap)
777 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
778
779 /*
780 * Check for UTF-16 surrogate (illegal UTF-32)...
781 */
782 if ((ch >= 0xd800) && (ch <= 0xdfff))
783 return (-1);
784
785 /*
786 * Check for beyond Plane 16 (invalid UTF-32)...
787 */
788 if (ch > 0x10ffff)
789 return (-1);
790
791 /*
792 * Convert beyond Plane 0 (BMP) to Replacement Character...
793 */
794 if (ch > 0xffff)
795 ch = 0xfffd;
796 *dest = (cups_utf16_t) ch;
797 }
798 *dest = 0;
799 return (i);
800 }
801
802 /*
803 * 'get_charmap_count()' - Count lines in a charmap file.
804 */
805 static int /* O - Count or -1 on error */
806 get_charmap_count(const char *filename) /* I - Charmap Filename */
807 {
808 int i; /* Looping variable */
809 cups_file_t *fp; /* Map input file pointer */
810 char *s; /* Line parsing pointer */
811 char line[256]; /* Line from input map file */
812 cups_utf32_t unichar; /* Unicode character value */
813
814 /*
815 * Open map input file...
816 */
817 if ((filename == NULL) || (*filename == '\0'))
818 return (-1);
819 fp = cupsFileOpen(filename, "r");
820 if (fp == NULL)
821 return (-1);
822
823 /*
824 * Count lines in map input file...
825 */
826 for (i = 0; i < CUPS_MAX_CHARMAP_LINES;)
827 {
828 s = cupsFileGets(fp, line, sizeof(line));
829 if (s == NULL)
830 break;
831 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
832 continue;
833 while ((*s != 0) && (*s != ' ') && (*s != '\t'))
834 s ++;
835 while ((*s == ' ') || (*s == '\t'))
836 s ++;
837 if (strncmp (s, "0x", 2) == 0)
838 s += 2;
839 if ((sscanf(s, "%lx", &unichar) != 1)
840 || (unichar > 0xffff))
841 {
842 cupsFileClose(fp);
843 return (-1);
844 }
845 i ++;
846 }
847 if (i == 0)
848 i = -1;
849
850 /*
851 * Close file and return charmap count (non-comment line count)...
852 */
853 cupsFileClose(fp);
854 return (i);
855 }
856
857 /*
858 * 'get_sbcs_charmap()' - Get SBCS Charmap.
859 */
860 static cups_cmap_t * /* O - Charmap or 0 on error */
861 get_sbcs_charmap(const cups_encoding_t encoding,
862 /* I - Charmap Encoding */
863 const char *filename) /* I - Charmap Filename */
864 {
865 int i; /* Loop variable */
866 unsigned long legchar; /* Legacy character value */
867 cups_utf32_t unichar; /* Unicode character value */
868 cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
869 cups_file_t *fp; /* Charset map file pointer */
870 char *s; /* Line parsing pointer */
871 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
872 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
873 char line[256]; /* Line from charset map file */
874 cups_globals_t *cg = _cupsGlobals();
875 /* Pointer to library globals */
876
877 /*
878 * Check for valid arguments...
879 */
880 if ((encoding < 0) || (filename == NULL))
881 return (NULL);
882
883 /*
884 * See if we already have this SBCS charset map loaded...
885 */
886 for (cmap = cg->cmap_cache; cmap != NULL; cmap = cmap->next)
887 {
888 if (cmap->encoding == encoding)
889 {
890 cmap->used ++;
891 return ((void *) cmap);
892 }
893 }
894
895 /*
896 * Open SBCS charset map input file...
897 */
898 fp = cupsFileOpen(filename, "r");
899 if (fp == NULL)
900 return (NULL);
901
902 /*
903 * Allocate memory for SBCS charset map and add to cache...
904 */
905 cmap = (cups_cmap_t *) calloc(1, sizeof(cups_cmap_t));
906 if (cmap == NULL)
907 {
908 cupsFileClose(fp);
909 return (NULL);
910 }
911 cmap->next = cg->cmap_cache;
912 cg->cmap_cache = cmap;
913 cmap->used ++;
914 cmap->encoding = encoding;
915
916 /*
917 * Save SBCS charset map into memory for transcoding...
918 */
919 for (i = 0; i < CUPS_MAX_CHARMAP_LINES;)
920 {
921 s = cupsFileGets(fp, line, sizeof(line));
922 if (s == NULL)
923 break;
924 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
925 continue;
926 if (strncmp (s, "0x", 2) == 0)
927 s += 2;
928 if ((sscanf(s, "%lx", &legchar) != 1)
929 || (legchar > 0xff))
930 {
931 cupsFileClose(fp);
932 cupsCharmapFlush();
933 return (NULL);
934 }
935 while ((*s != 0) && (*s != ' ') && (*s != '\t'))
936 s ++;
937 while ((*s == ' ') || (*s == '\t'))
938 s ++;
939 if (strncmp (s, "0x", 2) == 0)
940 s += 2;
941 if (sscanf(s, "%lx", &unichar) != 1)
942 {
943 cupsFileClose(fp);
944 cupsCharmapFlush();
945 return (NULL);
946 }
947 i ++;
948
949 /*
950 * Convert beyond Plane 0 (BMP) to Replacement Character...
951 */
952 if (unichar > 0xffff)
953 unichar = 0xfffd;
954
955 /*
956 * Save legacy to Unicode mapping in direct lookup table...
957 */
958 crow = &cmap->char2uni[(int) legchar];
959 *crow = (cups_ucs2_t) (unichar & 0xffff);
960
961 /*
962 * Save Unicode to legacy mapping in indirect lookup table...
963 */
964 srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)];
965 if (srow == NULL)
966 {
967 srow = (cups_sbcs_t *) calloc(256, sizeof(cups_sbcs_t));
968 if (srow == NULL)
969 {
970 cupsFileClose(fp);
971 cupsCharmapFlush();
972 return (NULL);
973 }
974 cmap->uni2char[(int) ((unichar >> 8) & 0xff)] = srow;
975 }
976 srow += (int) (unichar & 0xff);
977
978 /*
979 * Convert Replacement Character to visible replacement...
980 */
981 if (unichar == 0xfffd)
982 legchar = (unsigned long) '?';
983
984 /*
985 * First (oldest) legacy character uses Unicode mapping cell...
986 */
987 if (*srow == 0)
988 *srow = (cups_sbcs_t) legchar;
989 }
990 cupsFileClose(fp);
991 return (cmap);
992 }
993
994 /*
995 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
996 */
997 static cups_vmap_t * /* O - Charmap or 0 on error */
998 get_vbcs_charmap(const cups_encoding_t encoding,
999 /* I - Charmap Encoding */
1000 const char *filename) /* I - Charmap Filename */
1001 {
1002 cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1003 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1004 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1005 cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1006 cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
1007 unsigned long legchar; /* Legacy character value */
1008 cups_utf32_t unichar; /* Unicode character value */
1009 int mapcount; /* Count of lines in charmap file */
1010 cups_file_t *fp; /* Charset map file pointer */
1011 char *s; /* Line parsing pointer */
1012 char line[256]; /* Line from charset map file */
1013 int i; /* Loop variable */
1014 int wide; /* 32-bit legacy char */
1015 cups_globals_t *cg = _cupsGlobals();
1016 /* Pointer to library globals */
1017
1018 /*
1019 * Check for valid arguments...
1020 */
1021 if ((encoding < 0) || (filename == NULL))
1022 return (NULL);
1023
1024 /*
1025 * See if we already have this DBCS/VBCS charset map loaded...
1026 */
1027 for (vmap = cg->vmap_cache; vmap != NULL; vmap = vmap->next)
1028 {
1029 if (vmap->encoding == encoding)
1030 {
1031 vmap->used ++;
1032 return ((void *) vmap);
1033 }
1034 }
1035
1036 /*
1037 * Count lines in charmap file...
1038 */
1039 mapcount = get_charmap_count(filename);
1040 if (mapcount <= 0)
1041 return (NULL);
1042
1043 /*
1044 * Open VBCS charset map input file...
1045 */
1046 fp = cupsFileOpen(filename, "r");
1047 if (fp == NULL)
1048 return (NULL);
1049
1050 /*
1051 * Allocate memory for DBCS/VBCS charset map and add to cache...
1052 */
1053 vmap = (cups_vmap_t *) calloc(1, sizeof(cups_vmap_t));
1054 if (vmap == NULL)
1055 {
1056 cupsFileClose(fp);
1057 return (NULL);
1058 }
1059 vmap->next = cg->vmap_cache;
1060 cg->vmap_cache = vmap;
1061 vmap->used ++;
1062 vmap->encoding = encoding;
1063
1064 /*
1065 * Save DBCS/VBCS charset map into memory for transcoding...
1066 */
1067 leadchar = 0;
1068 wide2uni = NULL;
1069
1070 for (i = 0, wide = 0; i < mapcount; )
1071 {
1072 s = cupsFileGets(fp, line, sizeof(line));
1073 if (s == NULL)
1074 break;
1075 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1076 continue;
1077 if (strncmp (s, "0x", 2) == 0)
1078 s += 2;
1079 if ((sscanf(s, "%lx", &legchar) != 1)
1080 || ((legchar > 0xffff) && (encoding < CUPS_ENCODING_DBCS_END)))
1081 {
1082 cupsFileClose(fp);
1083 cupsCharmapFlush();
1084 return (NULL);
1085 }
1086 while ((*s != 0) && (*s != ' ') && (*s != '\t'))
1087 s ++;
1088 while ((*s == ' ') || (*s == '\t'))
1089 s ++;
1090 if (strncmp (s, "0x", 2) == 0)
1091 s += 2;
1092 if (sscanf(s, "%lx", &unichar) != 1)
1093 {
1094 cupsFileClose(fp);
1095 cupsCharmapFlush();
1096 return (NULL);
1097 }
1098 i ++;
1099
1100 /*
1101 * Convert beyond Plane 0 (BMP) to Replacement Character...
1102 */
1103 if (unichar > 0xffff)
1104 unichar = 0xfffd;
1105
1106 /*
1107 * Save lead char of 2/3/4-byte legacy char...
1108 */
1109 if ((legchar > 0xff) && (legchar <= 0xffff))
1110 {
1111 leadchar = (cups_sbcs_t) (legchar >> 8);
1112 vmap->lead2char[leadchar] = leadchar;
1113 }
1114 if ((legchar > 0xffff) && (legchar <= 0xffffff))
1115 {
1116 leadchar = (cups_sbcs_t) (legchar >> 16);
1117 vmap->lead3char[leadchar] = leadchar;
1118 }
1119 if (legchar > 0xffffff)
1120 {
1121 leadchar = (cups_sbcs_t) (legchar >> 24);
1122 vmap->lead4char[leadchar] = leadchar;
1123 }
1124
1125 /*
1126 * Save Legacy to Unicode mapping...
1127 */
1128 if (legchar <= 0xffff)
1129 {
1130 /*
1131 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1132 */
1133 crow = vmap->char2uni[(int) leadchar];
1134 if (crow == NULL)
1135 {
1136 crow = (cups_ucs2_t *) calloc(256, sizeof(cups_ucs2_t));
1137 if (crow == NULL)
1138 {
1139 cupsFileClose(fp);
1140 cupsCharmapFlush();
1141 return (NULL);
1142 }
1143 vmap->char2uni[(int) leadchar] = crow;
1144 }
1145 crow += (int) (legchar & 0xff);
1146 *crow = (cups_vbcs_t) unichar;
1147 }
1148 else
1149 {
1150 /*
1151 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1152 */
1153 if (wide == 0)
1154 {
1155 wide = 1;
1156 vmap->widecount = (mapcount - i + 1);
1157 wide2uni = (cups_wide2uni_t *)
1158 calloc(vmap->widecount, sizeof(cups_wide2uni_t));
1159 if (wide2uni == NULL)
1160 {
1161 cupsFileClose(fp);
1162 cupsCharmapFlush();
1163 return (NULL);
1164 }
1165 vmap->wide2uni = wide2uni;
1166 }
1167 wide2uni->widechar = (cups_vbcs_t) legchar;
1168 wide2uni->unichar = unichar;
1169 wide2uni ++;
1170 }
1171
1172 /*
1173 * Save Unicode to legacy mapping in indirect lookup table...
1174 */
1175 vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1176 if (vrow == NULL)
1177 {
1178 vrow = (cups_vbcs_t *) calloc(256, sizeof(cups_vbcs_t));
1179 if (vrow == NULL)
1180 {
1181 cupsFileClose(fp);
1182 cupsCharmapFlush();
1183 return (NULL);
1184 }
1185 vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1186 }
1187 vrow += (int) (unichar & 0xff);
1188
1189 /*
1190 * Convert Replacement Character to visible replacement...
1191 */
1192 if (unichar == 0xfffd)
1193 legchar = (unsigned long) '?';
1194
1195 /*
1196 * First (oldest) legacy character uses Unicode mapping cell...
1197 */
1198 if (*vrow == 0)
1199 *vrow = (cups_vbcs_t) legchar;
1200 }
1201 vmap->charcount = (i - vmap->widecount);
1202 cupsFileClose(fp);
1203 return (vmap);
1204 }
1205
1206 /*
1207 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
1208 */
1209 static int /* O - Count or -1 on error */
1210 conv_utf8_to_sbcs(char *dest, /* O - Target string */
1211 const cups_utf8_t *src, /* I - Source string */
1212 const int maxout, /* I - Max output */
1213 const cups_encoding_t encoding) /* I - Encoding */
1214 {
1215 char *start = dest; /* Start of destination string */
1216 cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1217 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
1218 cups_utf32_t unichar; /* Character value */
1219 int worklen; /* Internal UCS-4 string length */
1220 cups_utf32_t work[CUPS_MAX_USTRING];
1221 /* Internal UCS-4 string */
1222 int i; /* Looping variable */
1223
1224 /*
1225 * Check for valid arguments and clear output...
1226 */
1227 if ((dest == NULL)
1228 || (src == NULL)
1229 || (maxout < 1)
1230 || (maxout > CUPS_MAX_USTRING)
1231 || (encoding == CUPS_UTF8))
1232 return (-1);
1233 *dest = '\0';
1234
1235 /*
1236 * Find legacy charset map in cache...
1237 */
1238 cmap = (cups_cmap_t *) cupsCharmapGet(encoding);
1239 if (cmap == NULL)
1240 return (-1);
1241
1242 /*
1243 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1244 */
1245 worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
1246 if (worklen < 0)
1247 return (-1);
1248
1249 /*
1250 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
1251 */
1252 for (i = 0; i < worklen;)
1253 {
1254 unichar = work[i];
1255 if (unichar == 0)
1256 break;
1257 i ++;
1258
1259 /*
1260 * Check for leading BOM (and delete from output)...
1261 */
1262 if ((i == 1) && (unichar == 0xfeff))
1263 continue;
1264
1265 /*
1266 * Convert ASCII verbatim (optimization)...
1267 */
1268 if (unichar <= 0x7f)
1269 {
1270 *dest = (char) unichar;
1271 dest ++;
1272 continue;
1273 }
1274
1275 /*
1276 * Convert unknown character to visible replacement...
1277 */
1278 srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1279 if (srow)
1280 srow += (int) (unichar & 0xff);
1281 if ((srow == NULL) || (*srow == 0))
1282 *dest = '?';
1283 else
1284 *dest = (char) (*srow);
1285 dest ++;
1286 }
1287 *dest = '\0';
1288 worklen = (int) (dest - start);
1289 cupsCharmapFree(encoding);
1290 return (worklen);
1291 }
1292
1293 /*
1294 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
1295 */
1296 static int /* O - Count or -1 on error */
1297 conv_utf8_to_vbcs(char *dest, /* O - Target string */
1298 const cups_utf8_t *src, /* I - Source string */
1299 const int maxout, /* I - Max output */
1300 const cups_encoding_t encoding) /* I - Encoding */
1301 {
1302 char *start = dest; /* Start of destination string */
1303 cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
1304 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1305 cups_utf32_t unichar; /* Character value */
1306 cups_vbcs_t legchar; /* Legacy character value */
1307 int worklen; /* Internal UCS-4 string length */
1308 cups_utf32_t work[CUPS_MAX_USTRING];
1309 /* Internal UCS-4 string */
1310 int i; /* Looping variable */
1311
1312 /*
1313 * Check for valid arguments and clear output...
1314 */
1315 if ((dest == NULL)
1316 || (src == NULL)
1317 || (maxout < 1)
1318 || (maxout > CUPS_MAX_USTRING)
1319 || (encoding == CUPS_UTF8))
1320 return (-1);
1321 *dest = '\0';
1322
1323 /*
1324 * Find legacy charset map in cache...
1325 */
1326 vmap = (cups_vmap_t *) cupsCharmapGet(encoding);
1327 if (vmap == NULL)
1328 return (-1);
1329
1330 /*
1331 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1332 */
1333 worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
1334 if (worklen < 0)
1335 return (-1);
1336
1337 /*
1338 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
1339 */
1340 for (i = 0; i < worklen;)
1341 {
1342 unichar = work[i];
1343 if (unichar == 0)
1344 break;
1345 i ++;
1346
1347 /*
1348 * Check for leading BOM (and delete from output)...
1349 */
1350 if ((i == 1) && (unichar == 0xfeff))
1351 continue;
1352
1353 /*
1354 * Convert ASCII verbatim (optimization)...
1355 */
1356 if (unichar <= 0x7f)
1357 {
1358 *dest = (char) unichar;
1359 dest ++;
1360 continue;
1361 }
1362
1363 /*
1364 * Convert unknown character to visible replacement...
1365 */
1366 vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1367 if (vrow)
1368 vrow += (int) (unichar & 0xff);
1369 if ((vrow == NULL) || (*vrow == 0))
1370 legchar = (cups_vbcs_t) '?';
1371 else
1372 legchar = (cups_vbcs_t) *vrow;
1373
1374 /*
1375 * Save n-byte legacy character...
1376 */
1377 if (legchar > 0xffffff)
1378 {
1379 *dest = (char) ((legchar >> 24) & 0xff);
1380 dest++;
1381 }
1382 if (legchar > 0xffff)
1383 {
1384 *dest = (char) ((legchar >> 16) & 0xff);
1385 dest++;
1386 }
1387 if (legchar > 0xff)
1388 {
1389 *dest = (char) ((legchar >> 8) & 0xff);
1390 dest++;
1391 }
1392 *dest = (char) (legchar & 0xff);
1393 dest ++;
1394 }
1395 *dest = '\0';
1396 worklen = (int) (dest - start);
1397 cupsCharmapFree(encoding);
1398 return (worklen);
1399 }
1400
1401 /*
1402 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
1403 */
1404 static int /* O - Count or -1 on error */
1405 conv_sbcs_to_utf8(cups_utf8_t *dest, /* O - Target string */
1406 const char *src, /* I - Source string */
1407 const int maxout, /* I - Max output */
1408 const cups_encoding_t encoding) /* I - Encoding */
1409 {
1410 cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1411 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1412 unsigned long legchar; /* Legacy character value */
1413 cups_utf32_t unichar; /* Unicode character value */
1414 int worklen; /* Internal UCS-4 string length */
1415 cups_utf32_t work[CUPS_MAX_USTRING];
1416 /* Internal UCS-4 string */
1417 int i; /* Looping variable */
1418
1419 /*
1420 * Check for valid arguments and clear output...
1421 */
1422 if ((dest == NULL)
1423 || (src == NULL)
1424 || (maxout < 1)
1425 || (maxout > CUPS_MAX_USTRING)
1426 || (encoding == CUPS_UTF8))
1427 return (-1);
1428 *dest = '\0';
1429
1430 /*
1431 * Find legacy charset map in cache...
1432 */
1433 cmap = (cups_cmap_t *) cupsCharmapGet(encoding);
1434 if (cmap == NULL)
1435 return (-1);
1436
1437 /*
1438 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1439 */
1440 work[0] = 0xfeff;
1441 for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++)
1442 {
1443 if (*src == '\0')
1444 break;
1445 legchar = (unsigned long) *src;
1446
1447 /*
1448 * Convert ASCII verbatim (optimization)...
1449 */
1450 if (legchar <= 0x7f)
1451 {
1452 work[i] = (cups_utf32_t) legchar;
1453 i ++;
1454 continue;
1455 }
1456
1457 /*
1458 * Convert unknown character to Replacement Character...
1459 */
1460 crow = &cmap->char2uni[0];
1461 crow += (int) legchar;
1462 if (*crow == 0)
1463 unichar = 0xfffd;
1464 else
1465 unichar = (cups_utf32_t) *crow;
1466 work[i] = unichar;
1467 i ++;
1468 }
1469 work[i] = 0;
1470
1471 /*
1472 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1473 */
1474 worklen = cupsUTF32ToUTF8(dest, work, maxout);
1475 cupsCharmapFree(encoding);
1476 return (worklen);
1477 }
1478
1479
1480 /*
1481 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1482 */
1483 static int /* O - Count or -1 on error */
1484 conv_vbcs_to_utf8(cups_utf8_t *dest, /* O - Target string */
1485 const char *src, /* I - Source string */
1486 const int maxout, /* I - Max output */
1487 const cups_encoding_t encoding) /* I - Encoding */
1488 {
1489 cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1490 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1491 cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1492 cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
1493 cups_vbcs_t legchar; /* Legacy character value */
1494 cups_utf32_t unichar; /* Unicode character value */
1495 int i; /* Looping variable */
1496 int worklen; /* Internal UCS-4 string length */
1497 cups_utf32_t work[CUPS_MAX_USTRING];
1498 /* Internal UCS-4 string */
1499
1500 /*
1501 * Check for valid arguments and clear output...
1502 */
1503 if ((dest == NULL)
1504 || (src == NULL)
1505 || (maxout < 1)
1506 || (maxout > CUPS_MAX_USTRING)
1507 || (encoding == CUPS_UTF8))
1508 return (-1);
1509 *dest = '\0';
1510
1511 /*
1512 * Find legacy charset map in cache...
1513 */
1514 vmap = (cups_vmap_t *) cupsCharmapGet(encoding);
1515 if (vmap == NULL)
1516 return (-1);
1517
1518 /*
1519 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1520 */
1521 work[0] = 0xfeff;
1522 for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++)
1523 {
1524 if (*src == '\0')
1525 break;
1526 legchar = (cups_vbcs_t) *src;
1527 leadchar = (cups_sbcs_t) *src;
1528
1529 /*
1530 * Convert ASCII verbatim (optimization)...
1531 */
1532 if (legchar <= 0x7f)
1533 {
1534 work[i] = (cups_utf32_t) legchar;
1535 i ++;
1536 continue;
1537 }
1538
1539 /*
1540 * Convert 2-byte legacy character...
1541 */
1542 if (vmap->lead2char[(int) leadchar] == leadchar)
1543 {
1544 src ++;
1545 if (*src == '\0')
1546 return (-1);
1547 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1548
1549 /*
1550 * Convert unknown character to Replacement Character...
1551 */
1552 crow = vmap->char2uni[(int) ((legchar >> 8) & 0xff)];
1553 if (crow)
1554 crow += (int) (legchar & 0xff);
1555 if ((crow == NULL) || (*crow == 0))
1556 unichar = 0xfffd;
1557 else
1558 unichar = (cups_utf32_t) *crow;
1559 work[i] = unichar;
1560 i ++;
1561 continue;
1562 }
1563
1564 /*
1565 * Fetch 3-byte or 4-byte legacy character...
1566 */
1567 if (vmap->lead3char[(int) leadchar] == leadchar)
1568 {
1569 src ++;
1570 if (*src == '\0')
1571 return (-1);
1572 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1573 src ++;
1574 if (*src == '\0')
1575 return (-1);
1576 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1577 }
1578 else if (vmap->lead4char[(int) leadchar] == leadchar)
1579 {
1580 src ++;
1581 if (*src == '\0')
1582 return (-1);
1583 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1584 src ++;
1585 if (*src == '\0')
1586 return (-1);
1587 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1588 src ++;
1589 if (*src == '\0')
1590 return (-1);
1591 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1592 }
1593 else
1594 return (-1);
1595
1596 /*
1597 * Find 3-byte or 4-byte legacy character...
1598 */
1599 wide2uni = vmap->wide2uni;
1600 wide2uni = (cups_wide2uni_t *) bsearch(&legchar,
1601 vmap->wide2uni,
1602 vmap->widecount,
1603 sizeof(cups_wide2uni_t),
1604 compare_wide);
1605
1606 /*
1607 * Convert unknown character to Replacement Character...
1608 */
1609 if ((wide2uni == NULL) || (wide2uni->unichar == 0))
1610 unichar = 0xfffd;
1611 else
1612 unichar = wide2uni->unichar;
1613 work[i] = unichar;
1614 i ++;
1615 }
1616 work[i] = 0;
1617
1618 /*
1619 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1620 */
1621 worklen = cupsUTF32ToUTF8(dest, work, maxout);
1622 cupsCharmapFree(encoding);
1623 return (worklen);
1624 }
1625
1626 /*
1627 * 'compare_wide()' - Compare key for wide (VBCS) match.
1628 */
1629 static int
1630 compare_wide(const void *k1, /* I - Key char */
1631 const void *k2) /* I - Map char */
1632 {
1633 cups_vbcs_t *kp = (cups_vbcs_t *) k1;
1634 /* Key char pointer */
1635 cups_wide2uni_t *mp = (cups_wide2uni_t *) k2;
1636 /* Map char pointer */
1637 cups_vbcs_t key; /* Legacy key character */
1638 cups_vbcs_t map; /* Legacy map character */
1639 int result; /* Result Value */
1640
1641 key = *kp;
1642 map = mp->widechar;
1643 if (key >= map)
1644 result = (int) (key - map);
1645 else
1646 result = -1 * ((int) (map - key));
1647 return (result);
1648 }
1649
1650 /*
1651 * End of "$Id$"
1652 */