]>
git.ipfire.org Git - thirdparty/cups.git/blob - cups/transcode.c
2 * "$Id: transcode.c 4903 2006-01-10 20:02:46Z mike $"
4 * Transcoding support for the Common UNIX Printing System (CUPS).
6 * Copyright 1997-2006 by Easy Software Products.
8 * These coded instructions, statements, and computer programs are
9 * the property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the
11 * file "LICENSE.txt" which should have been included with this file.
12 * If this file is missing or damaged please contact Easy Software
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
26 * cupsCharmapGet() - Get a character set map.
27 * cupsCharmapFree() - Free a character set map.
28 * cupsCharmapFlush() - Flush all character set maps out of cache.
29 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
30 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
31 * cupsUTF8ToUTF16() - Convert UTF-8 to UTF-16.
32 * cupsUTF16ToUTF8() - Convert UTF-16 to UTF-8.
33 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
34 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
35 * cupsUTF16ToUTF32() - Convert UTF-16 to UTF-32.
36 * cupsUTF32ToUTF16() - Convert UTF-32 to UTF-16.
37 * get_charmap_count() - Count lines in a charmap file.
38 * get_sbcs_charmap() - Get SBCS Charmap.
39 * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
40 * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
41 * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
42 * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
43 * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
44 * compare_wide() - Compare key for wide (VBCS) match.
48 * Include necessary headers...
61 static int get_charmap_count(const char *filename
);
62 static _cups_cmap_t
*get_sbcs_charmap(const cups_encoding_t encoding
,
63 const char *filename
);
64 static _cups_vmap_t
*get_vbcs_charmap(const cups_encoding_t encoding
,
65 const char *filename
);
67 static int conv_utf8_to_sbcs(char *dest
,
68 const cups_utf8_t
*src
,
70 const cups_encoding_t encoding
);
71 static int conv_utf8_to_vbcs(char *dest
,
72 const cups_utf8_t
*src
,
74 const cups_encoding_t encoding
);
76 static int conv_sbcs_to_utf8(cups_utf8_t
*dest
,
79 const cups_encoding_t encoding
);
80 static int conv_vbcs_to_utf8(cups_utf8_t
*dest
,
83 const cups_encoding_t encoding
);
85 static int compare_wide(const void *k1
, const void *k2
);
88 * 'cupsCharmapGet()' - Get a character set map.
90 * This code handles single-byte (SBCS), double-byte (DBCS), and
91 * variable-byte (VBCS) character sets _without_ charset escapes...
92 * This code does not handle multiple-byte character sets (MBCS)
93 * (such as ISO-2022-JP) with charset switching via escapes...
96 void * /* O - Charset map pointer */
98 const cups_encoding_t encoding
) /* I - Encoding */
100 char mapname
[80]; /* Name of charset map */
101 char filename
[1024]; /* Filename for charset map file */
102 _cups_globals_t
*cg
= _cupsGlobals(); /* Global data */
106 * Check for valid arguments...
109 if ((encoding
< 0) || (encoding
>= CUPS_ENCODING_VBCS_END
))
113 * Get the data directory and charset map name...
116 snprintf(mapname
, sizeof(mapname
), "%s.txt", _cupsEncodingName(encoding
));
117 snprintf(filename
, sizeof(filename
), "%s/charmaps/%s",
118 cg
->cups_datadir
, mapname
);
121 * Read charset map input file into cache...
124 if (encoding
< CUPS_ENCODING_SBCS_END
)
125 return (get_sbcs_charmap(encoding
, filename
));
126 else if (encoding
< CUPS_ENCODING_VBCS_END
)
127 return (get_vbcs_charmap(encoding
, filename
));
133 * 'cupsCharmapFree()' - Free a character set map.
135 * This does not actually free; use 'cupsCharmapFlush()' for that.
138 cupsCharmapFree(const cups_encoding_t encoding
)
141 _cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
142 _cups_vmap_t
*vmap
; /* Legacy VBCS / Unicode Charset Map */
143 _cups_globals_t
*cg
= _cupsGlobals();
144 /* Pointer to library globals */
147 * See if we already have this SBCS charset map loaded...
149 for (cmap
= cg
->cmap_cache
; cmap
!= NULL
; cmap
= cmap
->next
)
151 if (cmap
->encoding
== encoding
)
160 * See if we already have this DBCS/VBCS charset map loaded...
162 for (vmap
= cg
->vmap_cache
; vmap
!= NULL
; vmap
= vmap
->next
)
164 if (vmap
->encoding
== encoding
)
175 * 'cupsCharmapFlush()' - Flush all character set maps out of cache.
178 cupsCharmapFlush(void)
180 int i
; /* Looping variable */
181 _cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
182 _cups_vmap_t
*vmap
; /* Legacy VBCS / Unicode Charset Map */
183 _cups_cmap_t
*cnext
; /* Next Legacy SBCS Charset Map */
184 _cups_vmap_t
*vnext
; /* Next Legacy VBCS Charset Map */
185 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
186 cups_sbcs_t
*srow
; /* Pointer to SBCS row in 'uni2char' */
187 cups_vbcs_t
*vrow
; /* Pointer to VBCS row in 'uni2char' */
188 _cups_globals_t
*cg
= _cupsGlobals();
189 /* Pointer to library globals */
192 * Loop through SBCS charset map cache, free all memory...
194 for (cmap
= cg
->cmap_cache
; cmap
!= NULL
; cmap
= cnext
)
196 for (i
= 0; i
< 256; i
++)
198 if ((srow
= cmap
->uni2char
[i
]) != NULL
)
204 cg
->cmap_cache
= NULL
;
207 * Loop through DBCS/VBCS charset map cache, free all memory...
209 for (vmap
= cg
->vmap_cache
; vmap
!= NULL
; vmap
= vnext
)
211 for (i
= 0; i
< 256; i
++)
213 if ((crow
= vmap
->char2uni
[i
]) != NULL
)
216 for (i
= 0; i
< 256; i
++)
218 if ((vrow
= vmap
->uni2char
[i
]) != NULL
)
222 free(vmap
->wide2uni
);
226 cg
->vmap_cache
= NULL
;
231 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
233 * This code handles single-byte (SBCS), double-byte (DBCS), and
234 * variable-byte (VBCS) character sets _without_ charset escapes...
235 * This code does not handle multiple-byte character sets (MBCS)
236 * (such as ISO-2022-JP) with charset switching via escapes...
238 int /* O - Count or -1 on error */
239 cupsUTF8ToCharset(char *dest
, /* O - Target string */
240 const cups_utf8_t
*src
, /* I - Source string */
241 const int maxout
, /* I - Max output */
242 const cups_encoding_t encoding
) /* I - Encoding */
245 * Check for valid arguments...
248 if (!dest
|| !src
|| maxout
< 1 || maxout
> CUPS_MAX_USTRING
)
252 * Handle identity conversions...
255 if (encoding
== CUPS_UTF8
||
256 encoding
< 0 || encoding
>= CUPS_ENCODING_VBCS_END
)
258 strlcpy(dest
, (char *)src
, maxout
);
259 return (strlen(dest
));
263 * Convert input UTF-8 to legacy charset...
265 if (encoding
< CUPS_ENCODING_SBCS_END
)
266 return (conv_utf8_to_sbcs(dest
, src
, maxout
, encoding
));
267 else if (encoding
< CUPS_ENCODING_VBCS_END
)
268 return (conv_utf8_to_vbcs(dest
, src
, maxout
, encoding
));
274 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
276 * This code handles single-byte (SBCS), double-byte (DBCS), and
277 * variable-byte (VBCS) character sets _without_ charset escapes...
278 * This code does not handle multiple-byte character sets (MBCS)
279 * (such as ISO-2022-JP) with charset switching via escapes...
281 int /* O - Count or -1 on error */
282 cupsCharsetToUTF8(cups_utf8_t
*dest
, /* O - Target string */
283 const char *src
, /* I - Source string */
284 const int maxout
, /* I - Max output */
285 const cups_encoding_t encoding
) /* I - Encoding */
288 * Check for valid arguments...
291 if (!dest
|| !src
|| maxout
< 1 || maxout
> CUPS_MAX_USTRING
)
295 * Handle identity conversions...
298 if (encoding
== CUPS_UTF8
||
299 encoding
< 0 || encoding
>= CUPS_ENCODING_VBCS_END
)
301 strlcpy((char *)dest
, src
, maxout
);
302 return (strlen((char *)dest
));
306 * Convert input legacy charset to UTF-8...
308 if (encoding
< CUPS_ENCODING_SBCS_END
)
309 return (conv_sbcs_to_utf8(dest
, src
, maxout
, encoding
));
310 else if (encoding
< CUPS_ENCODING_VBCS_END
)
311 return (conv_vbcs_to_utf8(dest
, src
, maxout
, encoding
));
317 * 'cupsUTF8ToUTF16()' - Convert UTF-8 to UTF-16.
319 * This code does not support Unicode beyond 16-bits (Plane 0)...
321 int /* O - Count or -1 on error */
322 cupsUTF8ToUTF16(cups_utf16_t
*dest
, /* O - Target string */
323 const cups_utf8_t
*src
, /* I - Source string */
324 const int maxout
) /* I - Max output */
326 int worklen
; /* Internal UCS-4 string length */
327 cups_utf32_t work
[CUPS_MAX_USTRING
];
328 /* Internal UCS-4 string */
331 * Check for valid arguments and clear output...
336 || (maxout
> CUPS_MAX_USTRING
))
341 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
343 worklen
= cupsUTF8ToUTF32(work
, src
, CUPS_MAX_USTRING
);
348 * Convert internal UCS-4 to output UTF-16...
350 worklen
= cupsUTF32ToUTF16(dest
, work
, maxout
);
355 * 'cupsUTF16ToUTF8()' - Convert UTF-16 to UTF-8.
357 * This code does not support Unicode beyond 16-bits (Plane 0)...
359 int /* O - Count or -1 on error */
360 cupsUTF16ToUTF8(cups_utf8_t
*dest
, /* O - Target string */
361 const cups_utf16_t
*src
, /* I - Source string */
362 const int maxout
) /* I - Max output */
364 int worklen
; /* Internal UCS-4 string length */
365 cups_utf32_t work
[CUPS_MAX_USTRING
];
366 /* Internal UCS-4 string */
369 * Check for valid arguments and clear output...
374 || (maxout
> CUPS_MAX_USTRING
))
379 * Convert input UTF-16 to internal UCS-4 (and byte-swap)...
381 worklen
= cupsUTF16ToUTF32(work
, src
, CUPS_MAX_USTRING
);
386 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
388 worklen
= cupsUTF32ToUTF8(dest
, work
, maxout
);
393 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
395 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
397 * UTF-32 char UTF-8 char(s)
398 * --------------------------------------------------
399 * 0 to 127 = 0xxxxxxx (US-ASCII)
400 * 128 to 2047 = 110xxxxx 10yyyyyy
401 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
402 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
404 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
405 * which would convert to five- or six-octet UTF-8 sequences...
407 * This code does not support Unicode beyond 16-bits (Plane 0)...
409 int /* O - Count or -1 on error */
410 cupsUTF8ToUTF32(cups_utf32_t
*dest
, /* O - Target string */
411 const cups_utf8_t
*src
, /* I - Source string */
412 const int maxout
) /* I - Max output */
414 cups_utf8_t
*first
= (cups_utf8_t
*) src
;
415 size_t srclen
; /* Source string length */
416 int i
; /* Looping variable */
417 cups_utf32_t ch
; /* Character value */
418 cups_utf32_t next
; /* Next character value */
419 cups_utf32_t ch32
; /* UTF-32 character value */
422 * Check for valid arguments and clear output...
427 || (maxout
> CUPS_MAX_USTRING
))
432 * Convert input UTF-8 to output UTF-32 (and insert BOM)...
436 srclen
= strlen((char *) src
);
437 for (i
= 1; i
< (maxout
- 1); src
++, dest
++)
439 ch
= (cups_utf32_t
) *src
;
446 * Convert UTF-8 character(s) to UTF-32 character...
448 if ((ch
& 0x7f) == ch
)
451 * One-octet UTF-8 <= 127 (US-ASCII)...
455 else if ((ch
& 0xe0) == 0xc0)
458 * Two-octet UTF-8 <= 2047 (Latin-x)...
461 next
= (cups_utf32_t
) *src
;
465 ch32
= ((ch
& 0x1f) << 6) | (next
& 0x3f);
468 * Check for non-shortest form (invalid UTF-8)...
474 else if ((ch
& 0xf0) == 0xe0)
477 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
480 next
= (cups_utf32_t
) *src
;
484 ch32
= ((ch
& 0x1f) << 6) | (next
& 0x3f);
486 next
= (cups_utf32_t
) *src
;
490 ch32
= ((ch32
<< 6) | (next
& 0x3f));
493 * Check for non-shortest form (invalid UTF-8)...
499 else if ((ch
& 0xf8) == 0xf0)
502 * Four-octet UTF-8 to Replacement Character...
504 if (((src
- first
) + 3) >= srclen
)
509 else if ((ch
& 0xfc) == 0xf8)
512 * Five-octet UTF-8 (invalid strict UTF-32)...
516 else if ((ch
& 0xfe) == 0xfc)
519 * Six-octet UTF-8 (invalid strict UTF-32)...
526 * More than six-octet (invalid UTF-8 sequence)...
532 * Check for UTF-16 surrogate (illegal UTF-8)...
534 if ((*dest
>= 0xd800) && (*dest
<= 0xdfff))
538 * Check for beyond Plane 16 (invalid UTF-8)...
540 if (*dest
> 0x10ffff)
548 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
550 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
552 * UTF-32 char UTF-8 char(s)
553 * --------------------------------------------------
554 * 0 to 127 = 0xxxxxxx (US-ASCII)
555 * 128 to 2047 = 110xxxxx 10yyyyyy
556 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
557 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
559 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
560 * which would convert to five- or six-octet UTF-8 sequences...
562 * This code does not support Unicode beyond 16-bits (Plane 0)...
564 int /* O - Count or -1 on error */
565 cupsUTF32ToUTF8(cups_utf8_t
*dest
, /* O - Target string */
566 const cups_utf32_t
*src
, /* I - Source string */
567 const int maxout
) /* I - Max output */
569 cups_utf32_t
*first
= (cups_utf32_t
*) src
;
570 /* First source char */
571 cups_utf8_t
*start
= dest
; /* Start of destination string */
572 int i
; /* Looping variable */
573 int swap
= 0; /* Byte-swap input to output */
574 cups_utf32_t ch
; /* Character value */
577 * Check for valid arguments and clear output...
586 * Check for leading BOM in UTF-32 and inverted BOM...
588 if (*src
== 0xfffe0000)
592 * Convert input UTF-32 to output UTF-8...
594 for (i
= 0; i
< (maxout
- 1); src
++)
601 * Byte swap input UTF-32, if necessary...
604 ch
= ((ch
>> 24) | ((ch
>> 8) & 0xff00) | ((ch
<< 8) & 0xff0000));
607 * Check for leading BOM (and delete from output)...
609 if ((src
== first
) && (ch
== 0xfeff))
613 * Check for beyond Plane 16 (invalid UTF-32)...
619 * Convert beyond Plane 0 (BMP) to Replacement Character...
625 * Convert UTF-32 character to UTF-8 character(s)...
630 * One-octet UTF-8 <= 127 (US-ASCII)...
632 *dest
= (cups_utf8_t
) ch
;
636 else if (ch
<= 0x7ff)
639 * Two-octet UTF-8 <= 2047 (Latin-x)...
641 if (i
> (maxout
- 2))
643 *dest
= (cups_utf8_t
) (0xc0 | ((ch
>> 6) & 0x1f));
646 *dest
= (cups_utf8_t
) (0x80 | (ch
& 0x3f));
653 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
655 if (i
> (maxout
- 3))
657 *dest
= (cups_utf8_t
) (0xe0 | ((ch
>> 12) & 0x0f));
660 *dest
= (cups_utf8_t
) (0x80 | ((ch
>> 6) & 0x3f));
663 *dest
= (cups_utf8_t
) (0x80 | (ch
& 0x3f));
669 i
= (int) (dest
- start
);
674 * 'cupsUTF16ToUTF32()' - Convert UTF-16 to UTF-32.
676 * This code does not support Unicode beyond 16-bits (Plane 0)...
678 int /* O - Count or -1 on error */
679 cupsUTF16ToUTF32(cups_utf32_t
*dest
, /* O - Target string */
680 const cups_utf16_t
*src
, /* I - Source string */
681 const int maxout
) /* I - Max output */
683 int i
; /* Looping variable */
684 int swap
= 0; /* Byte-swap input to output */
685 int surrogate
= 0; /* Expecting low-half surrogate */
686 cups_utf32_t ch
; /* Character value */
689 * Check for valid arguments and clear output...
694 || (maxout
> CUPS_MAX_USTRING
))
699 * Check for leading BOM in UTF-16 and inverted BOM...
705 * Convert input UTF-16 to output UTF-32...
707 for (i
= 0; i
< (maxout
- 1); src
++)
709 ch
= (cups_utf32_t
) (*src
& 0xffff);
715 * Byte swap input UTF-16, if necessary...
718 ch
= (cups_utf32_t
) ((ch
<< 8) | (ch
>> 8));
721 * Discard expected UTF-16 low-half surrogate...
723 if ((ch
>= 0xdc00) && (ch
<= 0xdfff))
732 * Convert UTF-16 high-half surrogate to Replacement Character...
734 if ((ch
>= 0xd800) && (ch
<= 0xdbff))
749 * 'cupsUTF32ToUTF16()' - Convert UTF-32 to UTF-16.
751 * This code does not support Unicode beyond 16-bits (Plane 0)...
753 int /* O - Count or -1 on error */
754 cupsUTF32ToUTF16(cups_utf16_t
*dest
, /* O - Target string */
755 const cups_utf32_t
*src
, /* I - Source string */
756 const int maxout
) /* I - Max output */
758 int i
; /* Looping variable */
759 int swap
= 0; /* Byte-swap input to output */
760 cups_utf32_t ch
; /* Character value */
763 * Check for valid arguments and clear output...
768 || (maxout
> CUPS_MAX_USTRING
))
773 * Check for leading BOM in UTF-32 and inverted BOM...
775 if (*src
== 0xfffe0000)
779 * Convert input UTF-32 to output UTF-16 (w/out surrogate pairs)...
781 for (i
= 0; i
< (maxout
- 1); src
++, dest
++)
789 * Byte swap input UTF-32, if necessary...
792 ch
= ((ch
>> 24) | ((ch
>> 8) & 0xff00) | ((ch
<< 8) & 0xff0000));
795 * Check for UTF-16 surrogate (illegal UTF-32)...
797 if ((ch
>= 0xd800) && (ch
<= 0xdfff))
801 * Check for beyond Plane 16 (invalid UTF-32)...
807 * Convert beyond Plane 0 (BMP) to Replacement Character...
811 *dest
= (cups_utf16_t
) ch
;
818 * 'get_charmap_count()' - Count lines in a charmap file.
820 static int /* O - Count or -1 on error */
821 get_charmap_count(const char *filename
) /* I - Charmap Filename */
823 int i
; /* Looping variable */
824 cups_file_t
*fp
; /* Map input file pointer */
825 char *s
; /* Line parsing pointer */
826 char line
[256]; /* Line from input map file */
827 cups_utf32_t unichar
; /* Unicode character value */
830 * Open map input file...
832 if ((filename
== NULL
) || (*filename
== '\0'))
834 fp
= cupsFileOpen(filename
, "r");
839 * Count lines in map input file...
841 for (i
= 0; i
< CUPS_MAX_CHARMAP_LINES
;)
843 s
= cupsFileGets(fp
, line
, sizeof(line
));
846 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
848 while ((*s
!= 0) && (*s
!= ' ') && (*s
!= '\t'))
850 while ((*s
== ' ') || (*s
== '\t'))
852 if (strncmp (s
, "0x", 2) == 0)
854 if ((sscanf(s
, "%lx", &unichar
) != 1)
855 || (unichar
> 0xffff))
866 * Close file and return charmap count (non-comment line count)...
873 * 'get_sbcs_charmap()' - Get SBCS Charmap.
875 static _cups_cmap_t
* /* O - Charmap or 0 on error */
876 get_sbcs_charmap(const cups_encoding_t encoding
,
877 /* I - Charmap Encoding */
878 const char *filename
) /* I - Charmap Filename */
880 int i
; /* Loop variable */
881 unsigned long legchar
; /* Legacy character value */
882 cups_utf32_t unichar
; /* Unicode character value */
883 _cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
884 cups_file_t
*fp
; /* Charset map file pointer */
885 char *s
; /* Line parsing pointer */
886 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
887 cups_sbcs_t
*srow
; /* Pointer to SBCS row in 'uni2char' */
888 char line
[256]; /* Line from charset map file */
889 _cups_globals_t
*cg
= _cupsGlobals();
890 /* Pointer to library globals */
893 * Check for valid arguments...
895 if ((encoding
< 0) || (filename
== NULL
))
899 * See if we already have this SBCS charset map loaded...
901 for (cmap
= cg
->cmap_cache
; cmap
!= NULL
; cmap
= cmap
->next
)
903 if (cmap
->encoding
== encoding
)
906 return ((void *) cmap
);
911 * Open SBCS charset map input file...
913 fp
= cupsFileOpen(filename
, "r");
918 * Allocate memory for SBCS charset map and add to cache...
920 cmap
= (_cups_cmap_t
*) calloc(1, sizeof(_cups_cmap_t
));
926 cmap
->next
= cg
->cmap_cache
;
927 cg
->cmap_cache
= cmap
;
929 cmap
->encoding
= encoding
;
932 * Save SBCS charset map into memory for transcoding...
934 for (i
= 0; i
< CUPS_MAX_CHARMAP_LINES
;)
936 s
= cupsFileGets(fp
, line
, sizeof(line
));
939 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
941 if (strncmp (s
, "0x", 2) == 0)
943 if ((sscanf(s
, "%lx", &legchar
) != 1)
950 while ((*s
!= 0) && (*s
!= ' ') && (*s
!= '\t'))
952 while ((*s
== ' ') || (*s
== '\t'))
954 if (strncmp (s
, "0x", 2) == 0)
956 if (sscanf(s
, "%lx", &unichar
) != 1)
965 * Convert beyond Plane 0 (BMP) to Replacement Character...
967 if (unichar
> 0xffff)
971 * Save legacy to Unicode mapping in direct lookup table...
973 crow
= &cmap
->char2uni
[(int) legchar
];
974 *crow
= (cups_ucs2_t
) (unichar
& 0xffff);
977 * Save Unicode to legacy mapping in indirect lookup table...
979 srow
= cmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)];
982 srow
= (cups_sbcs_t
*) calloc(256, sizeof(cups_sbcs_t
));
989 cmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)] = srow
;
991 srow
+= (int) (unichar
& 0xff);
994 * Convert Replacement Character to visible replacement...
996 if (unichar
== 0xfffd)
997 legchar
= (unsigned long) '?';
1000 * First (oldest) legacy character uses Unicode mapping cell...
1003 *srow
= (cups_sbcs_t
) legchar
;
1010 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1012 static _cups_vmap_t
* /* O - Charmap or 0 on error */
1013 get_vbcs_charmap(const cups_encoding_t encoding
,
1014 /* I - Charmap Encoding */
1015 const char *filename
) /* I - Charmap Filename */
1017 _cups_vmap_t
*vmap
; /* Legacy VBCS / Unicode Charset Map */
1018 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
1019 cups_vbcs_t
*vrow
; /* Pointer to VBCS row in 'uni2char' */
1020 _cups_wide2uni_t
*wide2uni
; /* Pointer to row in 'wide2uni' */
1021 cups_sbcs_t leadchar
; /* Lead char of 2-byte legacy char */
1022 unsigned long legchar
; /* Legacy character value */
1023 cups_utf32_t unichar
; /* Unicode character value */
1024 int mapcount
; /* Count of lines in charmap file */
1025 cups_file_t
*fp
; /* Charset map file pointer */
1026 char *s
; /* Line parsing pointer */
1027 char line
[256]; /* Line from charset map file */
1028 int i
; /* Loop variable */
1029 int wide
; /* 32-bit legacy char */
1030 _cups_globals_t
*cg
= _cupsGlobals();
1031 /* Pointer to library globals */
1034 * Check for valid arguments...
1036 if ((encoding
< 0) || (filename
== NULL
))
1040 * See if we already have this DBCS/VBCS charset map loaded...
1042 for (vmap
= cg
->vmap_cache
; vmap
!= NULL
; vmap
= vmap
->next
)
1044 if (vmap
->encoding
== encoding
)
1047 return ((void *) vmap
);
1052 * Count lines in charmap file...
1054 mapcount
= get_charmap_count(filename
);
1059 * Open VBCS charset map input file...
1061 fp
= cupsFileOpen(filename
, "r");
1066 * Allocate memory for DBCS/VBCS charset map and add to cache...
1068 vmap
= (_cups_vmap_t
*) calloc(1, sizeof(_cups_vmap_t
));
1074 vmap
->next
= cg
->vmap_cache
;
1075 cg
->vmap_cache
= vmap
;
1077 vmap
->encoding
= encoding
;
1080 * Save DBCS/VBCS charset map into memory for transcoding...
1085 for (i
= 0, wide
= 0; i
< mapcount
; )
1087 s
= cupsFileGets(fp
, line
, sizeof(line
));
1090 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
1092 if (strncmp (s
, "0x", 2) == 0)
1094 if ((sscanf(s
, "%lx", &legchar
) != 1)
1095 || ((legchar
> 0xffff) && (encoding
< CUPS_ENCODING_DBCS_END
)))
1101 while ((*s
!= 0) && (*s
!= ' ') && (*s
!= '\t'))
1103 while ((*s
== ' ') || (*s
== '\t'))
1105 if (strncmp (s
, "0x", 2) == 0)
1107 if (sscanf(s
, "%lx", &unichar
) != 1)
1116 * Convert beyond Plane 0 (BMP) to Replacement Character...
1118 if (unichar
> 0xffff)
1122 * Save lead char of 2/3/4-byte legacy char...
1124 if ((legchar
> 0xff) && (legchar
<= 0xffff))
1126 leadchar
= (cups_sbcs_t
) (legchar
>> 8);
1127 vmap
->lead2char
[leadchar
] = leadchar
;
1129 if ((legchar
> 0xffff) && (legchar
<= 0xffffff))
1131 leadchar
= (cups_sbcs_t
) (legchar
>> 16);
1132 vmap
->lead3char
[leadchar
] = leadchar
;
1134 if (legchar
> 0xffffff)
1136 leadchar
= (cups_sbcs_t
) (legchar
>> 24);
1137 vmap
->lead4char
[leadchar
] = leadchar
;
1141 * Save Legacy to Unicode mapping...
1143 if (legchar
<= 0xffff)
1146 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1148 crow
= vmap
->char2uni
[(int) leadchar
];
1151 crow
= (cups_ucs2_t
*) calloc(256, sizeof(cups_ucs2_t
));
1158 vmap
->char2uni
[(int) leadchar
] = crow
;
1160 crow
+= (int) (legchar
& 0xff);
1161 *crow
= (cups_ucs2_t
) unichar
;
1166 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1171 vmap
->widecount
= (mapcount
- i
+ 1);
1172 wide2uni
= (_cups_wide2uni_t
*)
1173 calloc(vmap
->widecount
, sizeof(_cups_wide2uni_t
));
1174 if (wide2uni
== NULL
)
1180 vmap
->wide2uni
= wide2uni
;
1182 wide2uni
->widechar
= (cups_vbcs_t
) legchar
;
1183 wide2uni
->unichar
= (cups_ucs2_t
)unichar
;
1188 * Save Unicode to legacy mapping in indirect lookup table...
1190 vrow
= vmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)];
1193 vrow
= (cups_vbcs_t
*) calloc(256, sizeof(cups_vbcs_t
));
1200 vmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)] = vrow
;
1202 vrow
+= (int) (unichar
& 0xff);
1205 * Convert Replacement Character to visible replacement...
1207 if (unichar
== 0xfffd)
1208 legchar
= (unsigned long) '?';
1211 * First (oldest) legacy character uses Unicode mapping cell...
1214 *vrow
= (cups_vbcs_t
) legchar
;
1216 vmap
->charcount
= (i
- vmap
->widecount
);
1222 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
1224 static int /* O - Count or -1 on error */
1225 conv_utf8_to_sbcs(char *dest
, /* O - Target string */
1226 const cups_utf8_t
*src
, /* I - Source string */
1227 const int maxout
, /* I - Max output */
1228 const cups_encoding_t encoding
) /* I - Encoding */
1230 char *start
= dest
; /* Start of destination string */
1231 _cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
1232 cups_sbcs_t
*srow
; /* Pointer to SBCS row in 'uni2char' */
1233 cups_utf32_t unichar
; /* Character value */
1234 int worklen
; /* Internal UCS-4 string length */
1235 cups_utf32_t work
[CUPS_MAX_USTRING
];
1236 /* Internal UCS-4 string */
1237 int i
; /* Looping variable */
1240 * Check for valid arguments and clear output...
1245 || (maxout
> CUPS_MAX_USTRING
)
1246 || (encoding
== CUPS_UTF8
))
1251 * Find legacy charset map in cache...
1253 cmap
= (_cups_cmap_t
*) cupsCharmapGet(encoding
);
1258 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1260 worklen
= cupsUTF8ToUTF32(work
, src
, CUPS_MAX_USTRING
);
1265 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
1267 for (i
= 0; i
< worklen
;)
1275 * Check for leading BOM (and delete from output)...
1277 if ((i
== 1) && (unichar
== 0xfeff))
1281 * Convert ASCII verbatim (optimization)...
1283 if (unichar
<= 0x7f)
1285 *dest
= (char) unichar
;
1291 * Convert unknown character to visible replacement...
1293 srow
= cmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)];
1295 srow
+= (int) (unichar
& 0xff);
1296 if ((srow
== NULL
) || (*srow
== 0))
1299 *dest
= (char) (*srow
);
1303 worklen
= (int) (dest
- start
);
1304 cupsCharmapFree(encoding
);
1309 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
1311 static int /* O - Count or -1 on error */
1312 conv_utf8_to_vbcs(char *dest
, /* O - Target string */
1313 const cups_utf8_t
*src
, /* I - Source string */
1314 const int maxout
, /* I - Max output */
1315 const cups_encoding_t encoding
) /* I - Encoding */
1317 char *start
= dest
; /* Start of destination string */
1318 _cups_vmap_t
*vmap
; /* Legacy DBCS / Unicode Charset Map */
1319 cups_vbcs_t
*vrow
; /* Pointer to VBCS row in 'uni2char' */
1320 cups_utf32_t unichar
; /* Character value */
1321 cups_vbcs_t legchar
; /* Legacy character value */
1322 int worklen
; /* Internal UCS-4 string length */
1323 cups_utf32_t work
[CUPS_MAX_USTRING
];
1324 /* Internal UCS-4 string */
1325 int i
; /* Looping variable */
1328 * Check for valid arguments and clear output...
1333 || (maxout
> CUPS_MAX_USTRING
)
1334 || (encoding
== CUPS_UTF8
))
1339 * Find legacy charset map in cache...
1341 vmap
= (_cups_vmap_t
*) cupsCharmapGet(encoding
);
1346 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1348 worklen
= cupsUTF8ToUTF32(work
, src
, CUPS_MAX_USTRING
);
1353 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
1355 for (i
= 0; i
< worklen
;)
1363 * Check for leading BOM (and delete from output)...
1365 if ((i
== 1) && (unichar
== 0xfeff))
1369 * Convert ASCII verbatim (optimization)...
1371 if (unichar
<= 0x7f)
1373 *dest
= (char) unichar
;
1379 * Convert unknown character to visible replacement...
1381 vrow
= vmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)];
1383 vrow
+= (int) (unichar
& 0xff);
1384 if ((vrow
== NULL
) || (*vrow
== 0))
1385 legchar
= (cups_vbcs_t
) '?';
1387 legchar
= (cups_vbcs_t
) *vrow
;
1390 * Save n-byte legacy character...
1392 if (legchar
> 0xffffff)
1394 *dest
= (char) ((legchar
>> 24) & 0xff);
1397 if (legchar
> 0xffff)
1399 *dest
= (char) ((legchar
>> 16) & 0xff);
1404 *dest
= (char) ((legchar
>> 8) & 0xff);
1407 *dest
= (char) (legchar
& 0xff);
1411 worklen
= (int) (dest
- start
);
1412 cupsCharmapFree(encoding
);
1417 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
1419 static int /* O - Count or -1 on error */
1420 conv_sbcs_to_utf8(cups_utf8_t
*dest
, /* O - Target string */
1421 const char *src
, /* I - Source string */
1422 const int maxout
, /* I - Max output */
1423 const cups_encoding_t encoding
) /* I - Encoding */
1425 _cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
1426 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
1427 unsigned long legchar
; /* Legacy character value */
1428 cups_utf32_t unichar
; /* Unicode character value */
1429 int worklen
; /* Internal UCS-4 string length */
1430 cups_utf32_t work
[CUPS_MAX_USTRING
];
1431 /* Internal UCS-4 string */
1432 int i
; /* Looping variable */
1435 * Check for valid arguments and clear output...
1440 || (maxout
> CUPS_MAX_USTRING
)
1441 || (encoding
== CUPS_UTF8
))
1446 * Find legacy charset map in cache...
1448 cmap
= (_cups_cmap_t
*) cupsCharmapGet(encoding
);
1453 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1456 for (i
= 1; i
< (CUPS_MAX_USTRING
- 1); src
++)
1460 legchar
= (unsigned long) *src
;
1463 * Convert ASCII verbatim (optimization)...
1465 if (legchar
<= 0x7f)
1467 work
[i
] = (cups_utf32_t
) legchar
;
1473 * Convert unknown character to Replacement Character...
1475 crow
= &cmap
->char2uni
[0];
1476 crow
+= (int) legchar
;
1480 unichar
= (cups_utf32_t
) *crow
;
1487 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1489 worklen
= cupsUTF32ToUTF8(dest
, work
, maxout
);
1490 cupsCharmapFree(encoding
);
1496 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1498 static int /* O - Count or -1 on error */
1499 conv_vbcs_to_utf8(cups_utf8_t
*dest
, /* O - Target string */
1500 const char *src
, /* I - Source string */
1501 const int maxout
, /* I - Max output */
1502 const cups_encoding_t encoding
) /* I - Encoding */
1504 _cups_vmap_t
*vmap
; /* Legacy VBCS / Unicode Charset Map */
1505 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
1506 _cups_wide2uni_t
*wide2uni
; /* Pointer to row in 'wide2uni' */
1507 cups_sbcs_t leadchar
; /* Lead char of n-byte legacy char */
1508 cups_vbcs_t legchar
; /* Legacy character value */
1509 cups_utf32_t unichar
; /* Unicode character value */
1510 int i
; /* Looping variable */
1511 int worklen
; /* Internal UCS-4 string length */
1512 cups_utf32_t work
[CUPS_MAX_USTRING
];
1513 /* Internal UCS-4 string */
1516 * Check for valid arguments and clear output...
1521 || (maxout
> CUPS_MAX_USTRING
)
1522 || (encoding
== CUPS_UTF8
))
1527 * Find legacy charset map in cache...
1529 vmap
= (_cups_vmap_t
*) cupsCharmapGet(encoding
);
1534 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1537 for (i
= 1; i
< (CUPS_MAX_USTRING
- 1); src
++)
1541 legchar
= (cups_vbcs_t
) *src
;
1542 leadchar
= (cups_sbcs_t
) *src
;
1545 * Convert ASCII verbatim (optimization)...
1547 if (legchar
<= 0x7f)
1549 work
[i
] = (cups_utf32_t
) legchar
;
1555 * Convert 2-byte legacy character...
1557 if (vmap
->lead2char
[(int) leadchar
] == leadchar
)
1562 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1565 * Convert unknown character to Replacement Character...
1567 crow
= vmap
->char2uni
[(int) ((legchar
>> 8) & 0xff)];
1569 crow
+= (int) (legchar
& 0xff);
1570 if ((crow
== NULL
) || (*crow
== 0))
1573 unichar
= (cups_utf32_t
) *crow
;
1580 * Fetch 3-byte or 4-byte legacy character...
1582 if (vmap
->lead3char
[(int) leadchar
] == leadchar
)
1587 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1591 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1593 else if (vmap
->lead4char
[(int) leadchar
] == leadchar
)
1598 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1602 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1606 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1612 * Find 3-byte or 4-byte legacy character...
1614 wide2uni
= vmap
->wide2uni
;
1615 wide2uni
= (_cups_wide2uni_t
*) bsearch(&legchar
,
1618 sizeof(_cups_wide2uni_t
),
1622 * Convert unknown character to Replacement Character...
1624 if ((wide2uni
== NULL
) || (wide2uni
->unichar
== 0))
1627 unichar
= wide2uni
->unichar
;
1634 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1636 worklen
= cupsUTF32ToUTF8(dest
, work
, maxout
);
1637 cupsCharmapFree(encoding
);
1642 * 'compare_wide()' - Compare key for wide (VBCS) match.
1645 compare_wide(const void *k1
, /* I - Key char */
1646 const void *k2
) /* I - Map char */
1648 cups_vbcs_t
*kp
= (cups_vbcs_t
*) k1
;
1649 /* Key char pointer */
1650 _cups_wide2uni_t
*mp
= (_cups_wide2uni_t
*) k2
;
1651 /* Map char pointer */
1652 cups_vbcs_t key
; /* Legacy key character */
1653 cups_vbcs_t map
; /* Legacy map character */
1654 int result
; /* Result Value */
1659 result
= (int) (key
- map
);
1661 result
= -1 * ((int) (map
- key
));
1667 * End of "$Id: transcode.c 4903 2006-01-10 20:02:46Z mike $"