]> git.ipfire.org Git - thirdparty/cups.git/blame - cups/transcode.c
Load cups into easysw/current.
[thirdparty/cups.git] / cups / transcode.c
CommitLineData
ef416fc2 1/*
fa73b229 2 * "$Id: transcode.c 4967 2006-01-24 03:42:15Z mike $"
ef416fc2 3 *
4 * Transcoding support for the Common UNIX Printing System (CUPS).
5 *
6 * Copyright 1997-2006 by Easy Software Products.
7 *
8 * These coded instructions, statements, and computer programs are
9 * the property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the
11 * file "LICENSE.txt" which should have been included with this file.
12 * If this file is missing or damaged please contact Easy Software
13 * Products at:
14 *
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
19 *
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
23 *
24 * Contents:
25 *
26 * cupsCharmapGet() - Get a character set map.
27 * cupsCharmapFree() - Free a character set map.
28 * cupsCharmapFlush() - Flush all character set maps out of cache.
fa73b229 29 * _cupsCharmapFlush() - Flush all character set maps out of cache.
ef416fc2 30 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
31 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
32 * cupsUTF8ToUTF16() - Convert UTF-8 to UTF-16.
33 * cupsUTF16ToUTF8() - Convert UTF-16 to UTF-8.
34 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
35 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
36 * cupsUTF16ToUTF32() - Convert UTF-16 to UTF-32.
37 * cupsUTF32ToUTF16() - Convert UTF-32 to UTF-16.
38 * get_charmap_count() - Count lines in a charmap file.
39 * get_sbcs_charmap() - Get SBCS Charmap.
40 * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
41 * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
42 * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
43 * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
44 * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
45 * compare_wide() - Compare key for wide (VBCS) match.
46 */
47
48/*
49 * Include necessary headers...
50 */
51
52#include "globals.h"
53#include <stdlib.h>
54#include <errno.h>
55#include <time.h>
56
57
58/*
59 * Prototypes...
60 */
61
62static int get_charmap_count(const char *filename);
63static _cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding,
64 const char *filename);
65static _cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding,
66 const char *filename);
67
68static int conv_utf8_to_sbcs(char *dest,
69 const cups_utf8_t *src,
70 const int maxout,
71 const cups_encoding_t encoding);
72static int conv_utf8_to_vbcs(char *dest,
73 const cups_utf8_t *src,
74 const int maxout,
75 const cups_encoding_t encoding);
76
77static int conv_sbcs_to_utf8(cups_utf8_t *dest,
78 const char *src,
79 const int maxout,
80 const cups_encoding_t encoding);
81static int conv_vbcs_to_utf8(cups_utf8_t *dest,
82 const char *src,
83 const int maxout,
84 const cups_encoding_t encoding);
85
86static int compare_wide(const void *k1, const void *k2);
87
88/*
89 * 'cupsCharmapGet()' - Get a character set map.
90 *
91 * This code handles single-byte (SBCS), double-byte (DBCS), and
92 * variable-byte (VBCS) character sets _without_ charset escapes...
93 * This code does not handle multiple-byte character sets (MBCS)
94 * (such as ISO-2022-JP) with charset switching via escapes...
95 */
96
97void * /* O - Charset map pointer */
98cupsCharmapGet(
99 const cups_encoding_t encoding) /* I - Encoding */
100{
101 char mapname[80]; /* Name of charset map */
102 char filename[1024]; /* Filename for charset map file */
103 _cups_globals_t *cg = _cupsGlobals(); /* Global data */
104
105
106 /*
107 * Check for valid arguments...
108 */
109
110 if ((encoding < 0) || (encoding >= CUPS_ENCODING_VBCS_END))
111 return (NULL);
112
113 /*
114 * Get the data directory and charset map name...
115 */
116
117 snprintf(mapname, sizeof(mapname), "%s.txt", _cupsEncodingName(encoding));
118 snprintf(filename, sizeof(filename), "%s/charmaps/%s",
119 cg->cups_datadir, mapname);
120
121 /*
122 * Read charset map input file into cache...
123 */
124
125 if (encoding < CUPS_ENCODING_SBCS_END)
126 return (get_sbcs_charmap(encoding, filename));
127 else if (encoding < CUPS_ENCODING_VBCS_END)
128 return (get_vbcs_charmap(encoding, filename));
129 else
130 return (NULL);
131}
132
133/*
134 * 'cupsCharmapFree()' - Free a character set map.
135 *
136 * This does not actually free; use 'cupsCharmapFlush()' for that.
137 */
138void
139cupsCharmapFree(const cups_encoding_t encoding)
140 /* I - Encoding */
141{
142 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
143 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
144 _cups_globals_t *cg = _cupsGlobals();
145 /* Pointer to library globals */
146
147 /*
148 * See if we already have this SBCS charset map loaded...
149 */
150 for (cmap = cg->cmap_cache; cmap != NULL; cmap = cmap->next)
151 {
152 if (cmap->encoding == encoding)
153 {
154 if (cmap->used > 0)
155 cmap->used --;
156 return;
157 }
158 }
159
160 /*
161 * See if we already have this DBCS/VBCS charset map loaded...
162 */
163 for (vmap = cg->vmap_cache; vmap != NULL; vmap = vmap->next)
164 {
165 if (vmap->encoding == encoding)
166 {
167 if (vmap->used > 0)
168 vmap->used --;
169 return;
170 }
171 }
172 return;
173}
174
fa73b229 175
ef416fc2 176/*
177 * 'cupsCharmapFlush()' - Flush all character set maps out of cache.
178 */
179void
180cupsCharmapFlush(void)
181{
fa73b229 182 _cupsCharmapFlush(_cupsGlobals());
183}
184
185
186/*
187 * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
188 */
189
190void
191_cupsCharmapFlush(_cups_globals_t *cg) /* I - Global data */
192{
193 int i; /* Looping variable */
194 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
195 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
196 _cups_cmap_t *cnext; /* Next Legacy SBCS Charset Map */
197 _cups_vmap_t *vnext; /* Next Legacy VBCS Charset Map */
198 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
199 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
200 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
201
ef416fc2 202
203 /*
204 * Loop through SBCS charset map cache, free all memory...
205 */
206 for (cmap = cg->cmap_cache; cmap != NULL; cmap = cnext)
207 {
208 for (i = 0; i < 256; i ++)
209 {
210 if ((srow = cmap->uni2char[i]) != NULL)
211 free(srow);
212 }
213 cnext = cmap->next;
214 free(cmap);
215 }
216 cg->cmap_cache = NULL;
217
218 /*
219 * Loop through DBCS/VBCS charset map cache, free all memory...
220 */
221 for (vmap = cg->vmap_cache; vmap != NULL; vmap = vnext)
222 {
223 for (i = 0; i < 256; i ++)
224 {
225 if ((crow = vmap->char2uni[i]) != NULL)
226 free(crow);
227 }
228 for (i = 0; i < 256; i ++)
229 {
230 if ((vrow = vmap->uni2char[i]) != NULL)
231 free(vrow);
232 }
233 if (vmap->wide2uni)
234 free(vmap->wide2uni);
235 vnext = vmap->next;
236 free(vmap);
237 }
238 cg->vmap_cache = NULL;
239 return;
240}
241
242/*
243 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
244 *
245 * This code handles single-byte (SBCS), double-byte (DBCS), and
246 * variable-byte (VBCS) character sets _without_ charset escapes...
247 * This code does not handle multiple-byte character sets (MBCS)
248 * (such as ISO-2022-JP) with charset switching via escapes...
249 */
250int /* O - Count or -1 on error */
251cupsUTF8ToCharset(char *dest, /* O - Target string */
252 const cups_utf8_t *src, /* I - Source string */
253 const int maxout, /* I - Max output */
254 const cups_encoding_t encoding) /* I - Encoding */
255{
256 /*
257 * Check for valid arguments...
258 */
259
260 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
261 return (-1);
262
263 /*
264 * Handle identity conversions...
265 */
266
267 if (encoding == CUPS_UTF8 ||
268 encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
269 {
270 strlcpy(dest, (char *)src, maxout);
271 return (strlen(dest));
272 }
273
274 /*
275 * Convert input UTF-8 to legacy charset...
276 */
277 if (encoding < CUPS_ENCODING_SBCS_END)
278 return (conv_utf8_to_sbcs(dest, src, maxout, encoding));
279 else if (encoding < CUPS_ENCODING_VBCS_END)
280 return (conv_utf8_to_vbcs(dest, src, maxout, encoding));
281 else
282 return (-1);
283}
284
285/*
286 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
287 *
288 * This code handles single-byte (SBCS), double-byte (DBCS), and
289 * variable-byte (VBCS) character sets _without_ charset escapes...
290 * This code does not handle multiple-byte character sets (MBCS)
291 * (such as ISO-2022-JP) with charset switching via escapes...
292 */
293int /* O - Count or -1 on error */
294cupsCharsetToUTF8(cups_utf8_t *dest, /* O - Target string */
295 const char *src, /* I - Source string */
296 const int maxout, /* I - Max output */
297 const cups_encoding_t encoding) /* I - Encoding */
298{
299 /*
300 * Check for valid arguments...
301 */
302
303 if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
304 return (-1);
305
306 /*
307 * Handle identity conversions...
308 */
309
310 if (encoding == CUPS_UTF8 ||
311 encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
312 {
313 strlcpy((char *)dest, src, maxout);
314 return (strlen((char *)dest));
315 }
316
317 /*
318 * Convert input legacy charset to UTF-8...
319 */
320 if (encoding < CUPS_ENCODING_SBCS_END)
321 return (conv_sbcs_to_utf8(dest, src, maxout, encoding));
322 else if (encoding < CUPS_ENCODING_VBCS_END)
323 return (conv_vbcs_to_utf8(dest, src, maxout, encoding));
324 else
325 return (-1);
326}
327
328/*
329 * 'cupsUTF8ToUTF16()' - Convert UTF-8 to UTF-16.
330 *
331 * This code does not support Unicode beyond 16-bits (Plane 0)...
332 */
333int /* O - Count or -1 on error */
334cupsUTF8ToUTF16(cups_utf16_t *dest, /* O - Target string */
335 const cups_utf8_t *src, /* I - Source string */
336 const int maxout) /* I - Max output */
337{
338 int worklen; /* Internal UCS-4 string length */
339 cups_utf32_t work[CUPS_MAX_USTRING];
340 /* Internal UCS-4 string */
341
342 /*
343 * Check for valid arguments and clear output...
344 */
345 if ((dest == NULL)
346 || (src == NULL)
347 || (maxout < 1)
348 || (maxout > CUPS_MAX_USTRING))
349 return (-1);
350 *dest = 0;
351
352 /*
353 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
354 */
355 worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
356 if (worklen < 0)
357 return (-1);
358
359 /*
360 * Convert internal UCS-4 to output UTF-16...
361 */
362 worklen = cupsUTF32ToUTF16(dest, work, maxout);
363 return (worklen);
364}
365
366/*
367 * 'cupsUTF16ToUTF8()' - Convert UTF-16 to UTF-8.
368 *
369 * This code does not support Unicode beyond 16-bits (Plane 0)...
370 */
371int /* O - Count or -1 on error */
372cupsUTF16ToUTF8(cups_utf8_t *dest, /* O - Target string */
373 const cups_utf16_t *src, /* I - Source string */
374 const int maxout) /* I - Max output */
375{
376 int worklen; /* Internal UCS-4 string length */
377 cups_utf32_t work[CUPS_MAX_USTRING];
378 /* Internal UCS-4 string */
379
380 /*
381 * Check for valid arguments and clear output...
382 */
383 if ((dest == NULL)
384 || (src == NULL)
385 || (maxout < 1)
386 || (maxout > CUPS_MAX_USTRING))
387 return (-1);
388 *dest = 0;
389
390 /*
391 * Convert input UTF-16 to internal UCS-4 (and byte-swap)...
392 */
393 worklen = cupsUTF16ToUTF32(work, src, CUPS_MAX_USTRING);
394 if (worklen < 0)
395 return (-1);
396
397 /*
398 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
399 */
400 worklen = cupsUTF32ToUTF8(dest, work, maxout);
401 return (worklen);
402}
403
404/*
405 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
406 *
407 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
408 *
409 * UTF-32 char UTF-8 char(s)
410 * --------------------------------------------------
411 * 0 to 127 = 0xxxxxxx (US-ASCII)
412 * 128 to 2047 = 110xxxxx 10yyyyyy
413 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
414 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
415 *
416 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
417 * which would convert to five- or six-octet UTF-8 sequences...
418 *
419 * This code does not support Unicode beyond 16-bits (Plane 0)...
420 */
421int /* O - Count or -1 on error */
422cupsUTF8ToUTF32(cups_utf32_t *dest, /* O - Target string */
423 const cups_utf8_t *src, /* I - Source string */
424 const int maxout) /* I - Max output */
425{
426 cups_utf8_t *first = (cups_utf8_t *) src;
427 size_t srclen; /* Source string length */
428 int i; /* Looping variable */
429 cups_utf32_t ch; /* Character value */
430 cups_utf32_t next; /* Next character value */
431 cups_utf32_t ch32; /* UTF-32 character value */
432
433 /*
434 * Check for valid arguments and clear output...
435 */
436 if ((dest == NULL)
437 || (src == NULL)
438 || (maxout < 1)
439 || (maxout > CUPS_MAX_USTRING))
440 return (-1);
441 *dest = 0;
442
443 /*
444 * Convert input UTF-8 to output UTF-32 (and insert BOM)...
445 */
446 *dest = 0xfeff;
447 dest ++;
448 srclen = strlen((char *) src);
449 for (i = 1; i < (maxout - 1); src ++, dest ++)
450 {
451 ch = (cups_utf32_t) *src;
452 ch &= 0xff;
453 if (ch == 0)
454 break;
455 i ++;
456
457 /*
458 * Convert UTF-8 character(s) to UTF-32 character...
459 */
460 if ((ch & 0x7f) == ch)
461 {
462 /*
463 * One-octet UTF-8 <= 127 (US-ASCII)...
464 */
465 *dest = ch;
466 }
467 else if ((ch & 0xe0) == 0xc0)
468 {
469 /*
470 * Two-octet UTF-8 <= 2047 (Latin-x)...
471 */
472 src ++;
473 next = (cups_utf32_t) *src;
474 next &= 0xff;
475 if (next == 0)
476 return (-1);
477 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
478
479 /*
480 * Check for non-shortest form (invalid UTF-8)...
481 */
482 if (ch32 <= 127)
483 return (-1);
484 *dest = ch32;
485 }
486 else if ((ch & 0xf0) == 0xe0)
487 {
488 /*
489 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
490 */
491 src ++;
492 next = (cups_utf32_t) *src;
493 next &= 0xff;
494 if (next == 0)
495 return (-1);
496 ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
497 src ++;
498 next = (cups_utf32_t) *src;
499 next &= 0xff;
500 if (next == 0)
501 return (-1);
502 ch32 = ((ch32 << 6) | (next & 0x3f));
503
504 /*
505 * Check for non-shortest form (invalid UTF-8)...
506 */
507 if (ch32 <= 2047)
508 return (-1);
509 *dest = ch32;
510 }
511 else if ((ch & 0xf8) == 0xf0)
512 {
513 /*
514 * Four-octet UTF-8 to Replacement Character...
515 */
516 if (((src - first) + 3) >= srclen)
517 return (-1);
518 src += 3;
519 *dest = 0xfffd;
520 }
521 else if ((ch & 0xfc) == 0xf8)
522 {
523 /*
524 * Five-octet UTF-8 (invalid strict UTF-32)...
525 */
526 return (-1);
527 }
528 else if ((ch & 0xfe) == 0xfc)
529 {
530 /*
531 * Six-octet UTF-8 (invalid strict UTF-32)...
532 */
533 return (-1);
534 }
535 else
536 {
537 /*
538 * More than six-octet (invalid UTF-8 sequence)...
539 */
540 return (-1);
541 }
542
543 /*
544 * Check for UTF-16 surrogate (illegal UTF-8)...
545 */
546 if ((*dest >= 0xd800) && (*dest <= 0xdfff))
547 return (-1);
548
549 /*
550 * Check for beyond Plane 16 (invalid UTF-8)...
551 */
552 if (*dest > 0x10ffff)
553 return (-1);
554 }
555 *dest = 0;
556 return (i);
557}
558
559/*
560 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
561 *
562 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
563 *
564 * UTF-32 char UTF-8 char(s)
565 * --------------------------------------------------
566 * 0 to 127 = 0xxxxxxx (US-ASCII)
567 * 128 to 2047 = 110xxxxx 10yyyyyy
568 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
569 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
570 *
571 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
572 * which would convert to five- or six-octet UTF-8 sequences...
573 *
574 * This code does not support Unicode beyond 16-bits (Plane 0)...
575 */
576int /* O - Count or -1 on error */
577cupsUTF32ToUTF8(cups_utf8_t *dest, /* O - Target string */
578 const cups_utf32_t *src, /* I - Source string */
579 const int maxout) /* I - Max output */
580{
581 cups_utf32_t *first = (cups_utf32_t *) src;
582 /* First source char */
583 cups_utf8_t *start = dest; /* Start of destination string */
584 int i; /* Looping variable */
585 int swap = 0; /* Byte-swap input to output */
586 cups_utf32_t ch; /* Character value */
587
588 /*
589 * Check for valid arguments and clear output...
590 */
591 if ((dest == NULL)
592 || (src == NULL)
593 || (maxout < 1))
594 return (-1);
595 *dest = '\0';
596
597 /*
598 * Check for leading BOM in UTF-32 and inverted BOM...
599 */
600 if (*src == 0xfffe0000)
601 swap = 1;
602
603 /*
604 * Convert input UTF-32 to output UTF-8...
605 */
606 for (i = 0; i < (maxout - 1); src ++)
607 {
608 ch = *src;
609 if (ch == 0)
610 break;
611
612 /*
613 * Byte swap input UTF-32, if necessary...
614 */
615 if (swap)
616 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
617
618 /*
619 * Check for leading BOM (and delete from output)...
620 */
621 if ((src == first) && (ch == 0xfeff))
622 continue;
623
624 /*
625 * Check for beyond Plane 16 (invalid UTF-32)...
626 */
627 if (ch > 0x10ffff)
628 return (-1);
629
630 /*
631 * Convert beyond Plane 0 (BMP) to Replacement Character...
632 */
633 if (ch > 0xffff)
634 ch = 0xfffd;
635
636 /*
637 * Convert UTF-32 character to UTF-8 character(s)...
638 */
639 if (ch <= 0x7f)
640 {
641 /*
642 * One-octet UTF-8 <= 127 (US-ASCII)...
643 */
644 *dest = (cups_utf8_t) ch;
645 dest ++;
646 i ++;
647 }
648 else if (ch <= 0x7ff)
649 {
650 /*
651 * Two-octet UTF-8 <= 2047 (Latin-x)...
652 */
653 if (i > (maxout - 2))
654 break;
655 *dest = (cups_utf8_t) (0xc0 | ((ch >> 6) & 0x1f));
656 dest ++;
657 i ++;
658 *dest = (cups_utf8_t) (0x80 | (ch & 0x3f));
659 dest ++;
660 i ++;
661 }
662 else
663 {
664 /*
665 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
666 */
667 if (i > (maxout - 3))
668 break;
669 *dest = (cups_utf8_t) (0xe0 | ((ch >> 12) & 0x0f));
670 dest ++;
671 i ++;
672 *dest = (cups_utf8_t) (0x80 | ((ch >> 6) & 0x3f));
673 dest ++;
674 i ++;
675 *dest = (cups_utf8_t) (0x80 | (ch & 0x3f));
676 dest ++;
677 i ++;
678 }
679 }
680 *dest = '\0';
681 i = (int) (dest - start);
682 return (i);
683}
684
685/*
686 * 'cupsUTF16ToUTF32()' - Convert UTF-16 to UTF-32.
687 *
688 * This code does not support Unicode beyond 16-bits (Plane 0)...
689 */
690int /* O - Count or -1 on error */
691cupsUTF16ToUTF32(cups_utf32_t *dest, /* O - Target string */
692 const cups_utf16_t *src, /* I - Source string */
693 const int maxout) /* I - Max output */
694{
695 int i; /* Looping variable */
696 int swap = 0; /* Byte-swap input to output */
697 int surrogate = 0; /* Expecting low-half surrogate */
698 cups_utf32_t ch; /* Character value */
699
700 /*
701 * Check for valid arguments and clear output...
702 */
703 if ((dest == NULL)
704 || (src == NULL)
705 || (maxout < 1)
706 || (maxout > CUPS_MAX_USTRING))
707 return (-1);
708 *dest = 0;
709
710 /*
711 * Check for leading BOM in UTF-16 and inverted BOM...
712 */
713 if (*src == 0xfffe)
714 swap = 1;
715
716 /*
717 * Convert input UTF-16 to output UTF-32...
718 */
719 for (i = 0; i < (maxout - 1); src ++)
720 {
721 ch = (cups_utf32_t) (*src & 0xffff);
722 if (ch == 0)
723 break;
724 i ++;
725
726 /*
727 * Byte swap input UTF-16, if necessary...
728 */
729 if (swap)
730 ch = (cups_utf32_t) ((ch << 8) | (ch >> 8));
731
732 /*
733 * Discard expected UTF-16 low-half surrogate...
734 */
735 if ((ch >= 0xdc00) && (ch <= 0xdfff))
736 {
737 if (surrogate == 0)
738 return (-1);
739 surrogate = 0;
740 continue;
741 }
742
743 /*
744 * Convert UTF-16 high-half surrogate to Replacement Character...
745 */
746 if ((ch >= 0xd800) && (ch <= 0xdbff))
747 {
748 if (surrogate == 1)
749 return (-1);
750 surrogate = 1;
751 ch = 0xfffd;
752 }
753 *dest = ch;
754 dest ++;
755 }
756 *dest = 0;
757 return (i);
758}
759
760/*
761 * 'cupsUTF32ToUTF16()' - Convert UTF-32 to UTF-16.
762 *
763 * This code does not support Unicode beyond 16-bits (Plane 0)...
764 */
765int /* O - Count or -1 on error */
766cupsUTF32ToUTF16(cups_utf16_t *dest, /* O - Target string */
767 const cups_utf32_t *src, /* I - Source string */
768 const int maxout) /* I - Max output */
769{
770 int i; /* Looping variable */
771 int swap = 0; /* Byte-swap input to output */
772 cups_utf32_t ch; /* Character value */
773
774 /*
775 * Check for valid arguments and clear output...
776 */
777 if ((dest == NULL)
778 || (src == NULL)
779 || (maxout < 1)
780 || (maxout > CUPS_MAX_USTRING))
781 return (-1);
782 *dest = 0;
783
784 /*
785 * Check for leading BOM in UTF-32 and inverted BOM...
786 */
787 if (*src == 0xfffe0000)
788 swap = 1;
789
790 /*
791 * Convert input UTF-32 to output UTF-16 (w/out surrogate pairs)...
792 */
793 for (i = 0; i < (maxout - 1); src ++, dest ++)
794 {
795 ch = *src;
796 if (ch == 0)
797 break;
798 i ++;
799
800 /*
801 * Byte swap input UTF-32, if necessary...
802 */
803 if (swap)
804 ch = ((ch >> 24) | ((ch >> 8) & 0xff00) | ((ch << 8) & 0xff0000));
805
806 /*
807 * Check for UTF-16 surrogate (illegal UTF-32)...
808 */
809 if ((ch >= 0xd800) && (ch <= 0xdfff))
810 return (-1);
811
812 /*
813 * Check for beyond Plane 16 (invalid UTF-32)...
814 */
815 if (ch > 0x10ffff)
816 return (-1);
817
818 /*
819 * Convert beyond Plane 0 (BMP) to Replacement Character...
820 */
821 if (ch > 0xffff)
822 ch = 0xfffd;
823 *dest = (cups_utf16_t) ch;
824 }
825 *dest = 0;
826 return (i);
827}
828
829/*
830 * 'get_charmap_count()' - Count lines in a charmap file.
831 */
832static int /* O - Count or -1 on error */
833get_charmap_count(const char *filename) /* I - Charmap Filename */
834{
835 int i; /* Looping variable */
836 cups_file_t *fp; /* Map input file pointer */
837 char *s; /* Line parsing pointer */
838 char line[256]; /* Line from input map file */
839 cups_utf32_t unichar; /* Unicode character value */
840
841 /*
842 * Open map input file...
843 */
844 if ((filename == NULL) || (*filename == '\0'))
845 return (-1);
846 fp = cupsFileOpen(filename, "r");
847 if (fp == NULL)
848 return (-1);
849
850 /*
851 * Count lines in map input file...
852 */
853 for (i = 0; i < CUPS_MAX_CHARMAP_LINES;)
854 {
855 s = cupsFileGets(fp, line, sizeof(line));
856 if (s == NULL)
857 break;
858 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
859 continue;
860 while ((*s != 0) && (*s != ' ') && (*s != '\t'))
861 s ++;
862 while ((*s == ' ') || (*s == '\t'))
863 s ++;
864 if (strncmp (s, "0x", 2) == 0)
865 s += 2;
866 if ((sscanf(s, "%lx", &unichar) != 1)
867 || (unichar > 0xffff))
868 {
869 cupsFileClose(fp);
870 return (-1);
871 }
872 i ++;
873 }
874 if (i == 0)
875 i = -1;
876
877 /*
878 * Close file and return charmap count (non-comment line count)...
879 */
880 cupsFileClose(fp);
881 return (i);
882}
883
884/*
885 * 'get_sbcs_charmap()' - Get SBCS Charmap.
886 */
887static _cups_cmap_t * /* O - Charmap or 0 on error */
888get_sbcs_charmap(const cups_encoding_t encoding,
889 /* I - Charmap Encoding */
890 const char *filename) /* I - Charmap Filename */
891{
892 int i; /* Loop variable */
893 unsigned long legchar; /* Legacy character value */
894 cups_utf32_t unichar; /* Unicode character value */
895 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
896 cups_file_t *fp; /* Charset map file pointer */
897 char *s; /* Line parsing pointer */
898 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
899 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
900 char line[256]; /* Line from charset map file */
901 _cups_globals_t *cg = _cupsGlobals();
902 /* Pointer to library globals */
903
904 /*
905 * Check for valid arguments...
906 */
907 if ((encoding < 0) || (filename == NULL))
908 return (NULL);
909
910 /*
911 * See if we already have this SBCS charset map loaded...
912 */
913 for (cmap = cg->cmap_cache; cmap != NULL; cmap = cmap->next)
914 {
915 if (cmap->encoding == encoding)
916 {
917 cmap->used ++;
918 return ((void *) cmap);
919 }
920 }
921
922 /*
923 * Open SBCS charset map input file...
924 */
925 fp = cupsFileOpen(filename, "r");
926 if (fp == NULL)
927 return (NULL);
928
929 /*
930 * Allocate memory for SBCS charset map and add to cache...
931 */
932 cmap = (_cups_cmap_t *) calloc(1, sizeof(_cups_cmap_t));
933 if (cmap == NULL)
934 {
935 cupsFileClose(fp);
936 return (NULL);
937 }
938 cmap->next = cg->cmap_cache;
939 cg->cmap_cache = cmap;
940 cmap->used ++;
941 cmap->encoding = encoding;
942
943 /*
944 * Save SBCS charset map into memory for transcoding...
945 */
946 for (i = 0; i < CUPS_MAX_CHARMAP_LINES;)
947 {
948 s = cupsFileGets(fp, line, sizeof(line));
949 if (s == NULL)
950 break;
951 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
952 continue;
953 if (strncmp (s, "0x", 2) == 0)
954 s += 2;
955 if ((sscanf(s, "%lx", &legchar) != 1)
956 || (legchar > 0xff))
957 {
958 cupsFileClose(fp);
959 cupsCharmapFlush();
960 return (NULL);
961 }
962 while ((*s != 0) && (*s != ' ') && (*s != '\t'))
963 s ++;
964 while ((*s == ' ') || (*s == '\t'))
965 s ++;
966 if (strncmp (s, "0x", 2) == 0)
967 s += 2;
968 if (sscanf(s, "%lx", &unichar) != 1)
969 {
970 cupsFileClose(fp);
971 cupsCharmapFlush();
972 return (NULL);
973 }
974 i ++;
975
976 /*
977 * Convert beyond Plane 0 (BMP) to Replacement Character...
978 */
979 if (unichar > 0xffff)
980 unichar = 0xfffd;
981
982 /*
983 * Save legacy to Unicode mapping in direct lookup table...
984 */
985 crow = &cmap->char2uni[(int) legchar];
986 *crow = (cups_ucs2_t) (unichar & 0xffff);
987
988 /*
989 * Save Unicode to legacy mapping in indirect lookup table...
990 */
991 srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)];
992 if (srow == NULL)
993 {
994 srow = (cups_sbcs_t *) calloc(256, sizeof(cups_sbcs_t));
995 if (srow == NULL)
996 {
997 cupsFileClose(fp);
998 cupsCharmapFlush();
999 return (NULL);
1000 }
1001 cmap->uni2char[(int) ((unichar >> 8) & 0xff)] = srow;
1002 }
1003 srow += (int) (unichar & 0xff);
1004
1005 /*
1006 * Convert Replacement Character to visible replacement...
1007 */
1008 if (unichar == 0xfffd)
1009 legchar = (unsigned long) '?';
1010
1011 /*
1012 * First (oldest) legacy character uses Unicode mapping cell...
1013 */
1014 if (*srow == 0)
1015 *srow = (cups_sbcs_t) legchar;
1016 }
1017 cupsFileClose(fp);
1018 return (cmap);
1019}
1020
1021/*
1022 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1023 */
1024static _cups_vmap_t * /* O - Charmap or 0 on error */
1025get_vbcs_charmap(const cups_encoding_t encoding,
1026 /* I - Charmap Encoding */
1027 const char *filename) /* I - Charmap Filename */
1028{
1029 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1030 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1031 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1032 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1033 cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
1034 unsigned long legchar; /* Legacy character value */
1035 cups_utf32_t unichar; /* Unicode character value */
1036 int mapcount; /* Count of lines in charmap file */
1037 cups_file_t *fp; /* Charset map file pointer */
1038 char *s; /* Line parsing pointer */
1039 char line[256]; /* Line from charset map file */
1040 int i; /* Loop variable */
1041 int wide; /* 32-bit legacy char */
1042 _cups_globals_t *cg = _cupsGlobals();
1043 /* Pointer to library globals */
1044
1045 /*
1046 * Check for valid arguments...
1047 */
1048 if ((encoding < 0) || (filename == NULL))
1049 return (NULL);
1050
1051 /*
1052 * See if we already have this DBCS/VBCS charset map loaded...
1053 */
1054 for (vmap = cg->vmap_cache; vmap != NULL; vmap = vmap->next)
1055 {
1056 if (vmap->encoding == encoding)
1057 {
1058 vmap->used ++;
1059 return ((void *) vmap);
1060 }
1061 }
1062
1063 /*
1064 * Count lines in charmap file...
1065 */
1066 mapcount = get_charmap_count(filename);
1067 if (mapcount <= 0)
1068 return (NULL);
1069
1070 /*
1071 * Open VBCS charset map input file...
1072 */
1073 fp = cupsFileOpen(filename, "r");
1074 if (fp == NULL)
1075 return (NULL);
1076
1077 /*
1078 * Allocate memory for DBCS/VBCS charset map and add to cache...
1079 */
1080 vmap = (_cups_vmap_t *) calloc(1, sizeof(_cups_vmap_t));
1081 if (vmap == NULL)
1082 {
1083 cupsFileClose(fp);
1084 return (NULL);
1085 }
1086 vmap->next = cg->vmap_cache;
1087 cg->vmap_cache = vmap;
1088 vmap->used ++;
1089 vmap->encoding = encoding;
1090
1091 /*
1092 * Save DBCS/VBCS charset map into memory for transcoding...
1093 */
1094 leadchar = 0;
1095 wide2uni = NULL;
1096
1097 for (i = 0, wide = 0; i < mapcount; )
1098 {
1099 s = cupsFileGets(fp, line, sizeof(line));
1100 if (s == NULL)
1101 break;
1102 if ((*s == '#') || (*s == '\n') || (*s == '\0'))
1103 continue;
1104 if (strncmp (s, "0x", 2) == 0)
1105 s += 2;
1106 if ((sscanf(s, "%lx", &legchar) != 1)
1107 || ((legchar > 0xffff) && (encoding < CUPS_ENCODING_DBCS_END)))
1108 {
1109 cupsFileClose(fp);
1110 cupsCharmapFlush();
1111 return (NULL);
1112 }
1113 while ((*s != 0) && (*s != ' ') && (*s != '\t'))
1114 s ++;
1115 while ((*s == ' ') || (*s == '\t'))
1116 s ++;
1117 if (strncmp (s, "0x", 2) == 0)
1118 s += 2;
1119 if (sscanf(s, "%lx", &unichar) != 1)
1120 {
1121 cupsFileClose(fp);
1122 cupsCharmapFlush();
1123 return (NULL);
1124 }
1125 i ++;
1126
1127 /*
1128 * Convert beyond Plane 0 (BMP) to Replacement Character...
1129 */
1130 if (unichar > 0xffff)
1131 unichar = 0xfffd;
1132
1133 /*
1134 * Save lead char of 2/3/4-byte legacy char...
1135 */
1136 if ((legchar > 0xff) && (legchar <= 0xffff))
1137 {
1138 leadchar = (cups_sbcs_t) (legchar >> 8);
1139 vmap->lead2char[leadchar] = leadchar;
1140 }
1141 if ((legchar > 0xffff) && (legchar <= 0xffffff))
1142 {
1143 leadchar = (cups_sbcs_t) (legchar >> 16);
1144 vmap->lead3char[leadchar] = leadchar;
1145 }
1146 if (legchar > 0xffffff)
1147 {
1148 leadchar = (cups_sbcs_t) (legchar >> 24);
1149 vmap->lead4char[leadchar] = leadchar;
1150 }
1151
1152 /*
1153 * Save Legacy to Unicode mapping...
1154 */
1155 if (legchar <= 0xffff)
1156 {
1157 /*
1158 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1159 */
1160 crow = vmap->char2uni[(int) leadchar];
1161 if (crow == NULL)
1162 {
1163 crow = (cups_ucs2_t *) calloc(256, sizeof(cups_ucs2_t));
1164 if (crow == NULL)
1165 {
1166 cupsFileClose(fp);
1167 cupsCharmapFlush();
1168 return (NULL);
1169 }
1170 vmap->char2uni[(int) leadchar] = crow;
1171 }
1172 crow += (int) (legchar & 0xff);
1173 *crow = (cups_ucs2_t) unichar;
1174 }
1175 else
1176 {
1177 /*
1178 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1179 */
1180 if (wide == 0)
1181 {
1182 wide = 1;
1183 vmap->widecount = (mapcount - i + 1);
1184 wide2uni = (_cups_wide2uni_t *)
1185 calloc(vmap->widecount, sizeof(_cups_wide2uni_t));
1186 if (wide2uni == NULL)
1187 {
1188 cupsFileClose(fp);
1189 cupsCharmapFlush();
1190 return (NULL);
1191 }
1192 vmap->wide2uni = wide2uni;
1193 }
1194 wide2uni->widechar = (cups_vbcs_t) legchar;
1195 wide2uni->unichar = (cups_ucs2_t)unichar;
1196 wide2uni ++;
1197 }
1198
1199 /*
1200 * Save Unicode to legacy mapping in indirect lookup table...
1201 */
1202 vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1203 if (vrow == NULL)
1204 {
1205 vrow = (cups_vbcs_t *) calloc(256, sizeof(cups_vbcs_t));
1206 if (vrow == NULL)
1207 {
1208 cupsFileClose(fp);
1209 cupsCharmapFlush();
1210 return (NULL);
1211 }
1212 vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
1213 }
1214 vrow += (int) (unichar & 0xff);
1215
1216 /*
1217 * Convert Replacement Character to visible replacement...
1218 */
1219 if (unichar == 0xfffd)
1220 legchar = (unsigned long) '?';
1221
1222 /*
1223 * First (oldest) legacy character uses Unicode mapping cell...
1224 */
1225 if (*vrow == 0)
1226 *vrow = (cups_vbcs_t) legchar;
1227 }
1228 vmap->charcount = (i - vmap->widecount);
1229 cupsFileClose(fp);
1230 return (vmap);
1231}
1232
1233/*
1234 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
1235 */
1236static int /* O - Count or -1 on error */
1237conv_utf8_to_sbcs(char *dest, /* O - Target string */
1238 const cups_utf8_t *src, /* I - Source string */
1239 const int maxout, /* I - Max output */
1240 const cups_encoding_t encoding) /* I - Encoding */
1241{
1242 char *start = dest; /* Start of destination string */
1243 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1244 cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
1245 cups_utf32_t unichar; /* Character value */
1246 int worklen; /* Internal UCS-4 string length */
1247 cups_utf32_t work[CUPS_MAX_USTRING];
1248 /* Internal UCS-4 string */
1249 int i; /* Looping variable */
1250
1251 /*
1252 * Check for valid arguments and clear output...
1253 */
1254 if ((dest == NULL)
1255 || (src == NULL)
1256 || (maxout < 1)
1257 || (maxout > CUPS_MAX_USTRING)
1258 || (encoding == CUPS_UTF8))
1259 return (-1);
1260 *dest = '\0';
1261
1262 /*
1263 * Find legacy charset map in cache...
1264 */
1265 cmap = (_cups_cmap_t *) cupsCharmapGet(encoding);
1266 if (cmap == NULL)
1267 return (-1);
1268
1269 /*
1270 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1271 */
1272 worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
1273 if (worklen < 0)
1274 return (-1);
1275
1276 /*
1277 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
1278 */
1279 for (i = 0; i < worklen;)
1280 {
1281 unichar = work[i];
1282 if (unichar == 0)
1283 break;
1284 i ++;
1285
1286 /*
1287 * Check for leading BOM (and delete from output)...
1288 */
1289 if ((i == 1) && (unichar == 0xfeff))
1290 continue;
1291
1292 /*
1293 * Convert ASCII verbatim (optimization)...
1294 */
1295 if (unichar <= 0x7f)
1296 {
1297 *dest = (char) unichar;
1298 dest ++;
1299 continue;
1300 }
1301
1302 /*
1303 * Convert unknown character to visible replacement...
1304 */
1305 srow = cmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1306 if (srow)
1307 srow += (int) (unichar & 0xff);
1308 if ((srow == NULL) || (*srow == 0))
1309 *dest = '?';
1310 else
1311 *dest = (char) (*srow);
1312 dest ++;
1313 }
1314 *dest = '\0';
1315 worklen = (int) (dest - start);
1316 cupsCharmapFree(encoding);
1317 return (worklen);
1318}
1319
1320/*
1321 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
1322 */
1323static int /* O - Count or -1 on error */
1324conv_utf8_to_vbcs(char *dest, /* O - Target string */
1325 const cups_utf8_t *src, /* I - Source string */
1326 const int maxout, /* I - Max output */
1327 const cups_encoding_t encoding) /* I - Encoding */
1328{
1329 char *start = dest; /* Start of destination string */
1330 _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
1331 cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
1332 cups_utf32_t unichar; /* Character value */
1333 cups_vbcs_t legchar; /* Legacy character value */
1334 int worklen; /* Internal UCS-4 string length */
1335 cups_utf32_t work[CUPS_MAX_USTRING];
1336 /* Internal UCS-4 string */
1337 int i; /* Looping variable */
1338
1339 /*
1340 * Check for valid arguments and clear output...
1341 */
1342 if ((dest == NULL)
1343 || (src == NULL)
1344 || (maxout < 1)
1345 || (maxout > CUPS_MAX_USTRING)
1346 || (encoding == CUPS_UTF8))
1347 return (-1);
1348 *dest = '\0';
1349
1350 /*
1351 * Find legacy charset map in cache...
1352 */
1353 vmap = (_cups_vmap_t *) cupsCharmapGet(encoding);
1354 if (vmap == NULL)
1355 return (-1);
1356
1357 /*
1358 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1359 */
1360 worklen = cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING);
1361 if (worklen < 0)
1362 return (-1);
1363
1364 /*
1365 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
1366 */
1367 for (i = 0; i < worklen;)
1368 {
1369 unichar = work[i];
1370 if (unichar == 0)
1371 break;
1372 i ++;
1373
1374 /*
1375 * Check for leading BOM (and delete from output)...
1376 */
1377 if ((i == 1) && (unichar == 0xfeff))
1378 continue;
1379
1380 /*
1381 * Convert ASCII verbatim (optimization)...
1382 */
1383 if (unichar <= 0x7f)
1384 {
1385 *dest = (char) unichar;
1386 dest ++;
1387 continue;
1388 }
1389
1390 /*
1391 * Convert unknown character to visible replacement...
1392 */
1393 vrow = vmap->uni2char[(int) ((unichar >> 8) & 0xff)];
1394 if (vrow)
1395 vrow += (int) (unichar & 0xff);
1396 if ((vrow == NULL) || (*vrow == 0))
1397 legchar = (cups_vbcs_t) '?';
1398 else
1399 legchar = (cups_vbcs_t) *vrow;
1400
1401 /*
1402 * Save n-byte legacy character...
1403 */
1404 if (legchar > 0xffffff)
1405 {
1406 *dest = (char) ((legchar >> 24) & 0xff);
1407 dest++;
1408 }
1409 if (legchar > 0xffff)
1410 {
1411 *dest = (char) ((legchar >> 16) & 0xff);
1412 dest++;
1413 }
1414 if (legchar > 0xff)
1415 {
1416 *dest = (char) ((legchar >> 8) & 0xff);
1417 dest++;
1418 }
1419 *dest = (char) (legchar & 0xff);
1420 dest ++;
1421 }
1422 *dest = '\0';
1423 worklen = (int) (dest - start);
1424 cupsCharmapFree(encoding);
1425 return (worklen);
1426}
1427
1428/*
1429 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
1430 */
1431static int /* O - Count or -1 on error */
1432conv_sbcs_to_utf8(cups_utf8_t *dest, /* O - Target string */
1433 const char *src, /* I - Source string */
1434 const int maxout, /* I - Max output */
1435 const cups_encoding_t encoding) /* I - Encoding */
1436{
1437 _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
1438 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1439 unsigned long legchar; /* Legacy character value */
1440 cups_utf32_t unichar; /* Unicode character value */
1441 int worklen; /* Internal UCS-4 string length */
1442 cups_utf32_t work[CUPS_MAX_USTRING];
1443 /* Internal UCS-4 string */
1444 int i; /* Looping variable */
1445
1446 /*
1447 * Check for valid arguments and clear output...
1448 */
1449 if ((dest == NULL)
1450 || (src == NULL)
1451 || (maxout < 1)
1452 || (maxout > CUPS_MAX_USTRING)
1453 || (encoding == CUPS_UTF8))
1454 return (-1);
1455 *dest = '\0';
1456
1457 /*
1458 * Find legacy charset map in cache...
1459 */
1460 cmap = (_cups_cmap_t *) cupsCharmapGet(encoding);
1461 if (cmap == NULL)
1462 return (-1);
1463
1464 /*
1465 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1466 */
1467 work[0] = 0xfeff;
1468 for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++)
1469 {
1470 if (*src == '\0')
1471 break;
1472 legchar = (unsigned long) *src;
1473
1474 /*
1475 * Convert ASCII verbatim (optimization)...
1476 */
1477 if (legchar <= 0x7f)
1478 {
1479 work[i] = (cups_utf32_t) legchar;
1480 i ++;
1481 continue;
1482 }
1483
1484 /*
1485 * Convert unknown character to Replacement Character...
1486 */
1487 crow = &cmap->char2uni[0];
1488 crow += (int) legchar;
1489 if (*crow == 0)
1490 unichar = 0xfffd;
1491 else
1492 unichar = (cups_utf32_t) *crow;
1493 work[i] = unichar;
1494 i ++;
1495 }
1496 work[i] = 0;
1497
1498 /*
1499 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1500 */
1501 worklen = cupsUTF32ToUTF8(dest, work, maxout);
1502 cupsCharmapFree(encoding);
1503 return (worklen);
1504}
1505
1506
1507/*
1508 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1509 */
1510static int /* O - Count or -1 on error */
1511conv_vbcs_to_utf8(cups_utf8_t *dest, /* O - Target string */
1512 const char *src, /* I - Source string */
1513 const int maxout, /* I - Max output */
1514 const cups_encoding_t encoding) /* I - Encoding */
1515{
1516 _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
1517 cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
1518 _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
1519 cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
1520 cups_vbcs_t legchar; /* Legacy character value */
1521 cups_utf32_t unichar; /* Unicode character value */
1522 int i; /* Looping variable */
1523 int worklen; /* Internal UCS-4 string length */
1524 cups_utf32_t work[CUPS_MAX_USTRING];
1525 /* Internal UCS-4 string */
1526
1527 /*
1528 * Check for valid arguments and clear output...
1529 */
1530 if ((dest == NULL)
1531 || (src == NULL)
1532 || (maxout < 1)
1533 || (maxout > CUPS_MAX_USTRING)
1534 || (encoding == CUPS_UTF8))
1535 return (-1);
1536 *dest = '\0';
1537
1538 /*
1539 * Find legacy charset map in cache...
1540 */
1541 vmap = (_cups_vmap_t *) cupsCharmapGet(encoding);
1542 if (vmap == NULL)
1543 return (-1);
1544
1545 /*
1546 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1547 */
1548 work[0] = 0xfeff;
1549 for (i = 1; i < (CUPS_MAX_USTRING - 1); src ++)
1550 {
1551 if (*src == '\0')
1552 break;
1553 legchar = (cups_vbcs_t) *src;
1554 leadchar = (cups_sbcs_t) *src;
1555
1556 /*
1557 * Convert ASCII verbatim (optimization)...
1558 */
1559 if (legchar <= 0x7f)
1560 {
1561 work[i] = (cups_utf32_t) legchar;
1562 i ++;
1563 continue;
1564 }
1565
1566 /*
1567 * Convert 2-byte legacy character...
1568 */
1569 if (vmap->lead2char[(int) leadchar] == leadchar)
1570 {
1571 src ++;
1572 if (*src == '\0')
1573 return (-1);
1574 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1575
1576 /*
1577 * Convert unknown character to Replacement Character...
1578 */
1579 crow = vmap->char2uni[(int) ((legchar >> 8) & 0xff)];
1580 if (crow)
1581 crow += (int) (legchar & 0xff);
1582 if ((crow == NULL) || (*crow == 0))
1583 unichar = 0xfffd;
1584 else
1585 unichar = (cups_utf32_t) *crow;
1586 work[i] = unichar;
1587 i ++;
1588 continue;
1589 }
1590
1591 /*
1592 * Fetch 3-byte or 4-byte legacy character...
1593 */
1594 if (vmap->lead3char[(int) leadchar] == leadchar)
1595 {
1596 src ++;
1597 if (*src == '\0')
1598 return (-1);
1599 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1600 src ++;
1601 if (*src == '\0')
1602 return (-1);
1603 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1604 }
1605 else if (vmap->lead4char[(int) leadchar] == leadchar)
1606 {
1607 src ++;
1608 if (*src == '\0')
1609 return (-1);
1610 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1611 src ++;
1612 if (*src == '\0')
1613 return (-1);
1614 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1615 src ++;
1616 if (*src == '\0')
1617 return (-1);
1618 legchar = (legchar << 8) | (cups_vbcs_t) *src;
1619 }
1620 else
1621 return (-1);
1622
1623 /*
1624 * Find 3-byte or 4-byte legacy character...
1625 */
1626 wide2uni = vmap->wide2uni;
1627 wide2uni = (_cups_wide2uni_t *) bsearch(&legchar,
1628 vmap->wide2uni,
1629 vmap->widecount,
1630 sizeof(_cups_wide2uni_t),
1631 compare_wide);
1632
1633 /*
1634 * Convert unknown character to Replacement Character...
1635 */
1636 if ((wide2uni == NULL) || (wide2uni->unichar == 0))
1637 unichar = 0xfffd;
1638 else
1639 unichar = wide2uni->unichar;
1640 work[i] = unichar;
1641 i ++;
1642 }
1643 work[i] = 0;
1644
1645 /*
1646 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1647 */
1648 worklen = cupsUTF32ToUTF8(dest, work, maxout);
1649 cupsCharmapFree(encoding);
1650 return (worklen);
1651}
1652
1653/*
1654 * 'compare_wide()' - Compare key for wide (VBCS) match.
1655 */
1656static int
1657compare_wide(const void *k1, /* I - Key char */
1658 const void *k2) /* I - Map char */
1659{
1660 cups_vbcs_t *kp = (cups_vbcs_t *) k1;
1661 /* Key char pointer */
1662 _cups_wide2uni_t *mp = (_cups_wide2uni_t *) k2;
1663 /* Map char pointer */
1664 cups_vbcs_t key; /* Legacy key character */
1665 cups_vbcs_t map; /* Legacy map character */
1666 int result; /* Result Value */
1667
1668 key = *kp;
1669 map = mp->widechar;
1670 if (key >= map)
1671 result = (int) (key - map);
1672 else
1673 result = -1 * ((int) (map - key));
1674 return (result);
1675}
1676
1677
1678/*
fa73b229 1679 * End of "$Id: transcode.c 4967 2006-01-24 03:42:15Z mike $"
ef416fc2 1680 */