]>
git.ipfire.org Git - thirdparty/cups.git/blob - cups/transcode.c
4 * Transcoding support for the Common UNIX Printing System (CUPS).
6 * Copyright 1997-2005 by Easy Software Products.
8 * These coded instructions, statements, and computer programs are
9 * the property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the
11 * file "LICENSE.txt" which should have been included with this file.
12 * If this file is missing or damaged please contact Easy Software
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
26 * cupsCharmapGet() - Get a character set map.
27 * cupsCharmapFree() - Free a character set map.
28 * cupsCharmapFlush() - Flush all character set maps out of cache.
29 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
30 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
31 * cupsUTF8ToUTF16() - Convert UTF-8 to UTF-16.
32 * cupsUTF16ToUTF8() - Convert UTF-16 to UTF-8.
33 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
34 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
35 * cupsUTF16ToUTF32() - Convert UTF-16 to UTF-32.
36 * cupsUTF32ToUTF16() - Convert UTF-32 to UTF-16.
37 * get_charmap_count() - Count lines in a charmap file.
38 * get_sbcs_charmap() - Get SBCS Charmap.
39 * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
40 * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
41 * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
42 * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
43 * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
44 * compare_wide() - Compare key for wide (VBCS) match.
48 * Include necessary headers...
61 static int get_charmap_count(const char *filename
);
62 static cups_cmap_t
*get_sbcs_charmap(const cups_encoding_t encoding
,
63 const char *filename
);
64 static cups_vmap_t
*get_vbcs_charmap(const cups_encoding_t encoding
,
65 const char *filename
);
67 static int conv_utf8_to_sbcs(char *dest
,
68 const cups_utf8_t
*src
,
70 const cups_encoding_t encoding
);
71 static int conv_utf8_to_vbcs(char *dest
,
72 const cups_utf8_t
*src
,
74 const cups_encoding_t encoding
);
76 static int conv_sbcs_to_utf8(cups_utf8_t
*dest
,
79 const cups_encoding_t encoding
);
80 static int conv_vbcs_to_utf8(cups_utf8_t
*dest
,
83 const cups_encoding_t encoding
);
85 static int compare_wide(const void *k1
, const void *k2
);
88 * 'cupsCharmapGet()' - Get a character set map.
90 * This code handles single-byte (SBCS), double-byte (DBCS), and
91 * variable-byte (VBCS) character sets _without_ charset escapes...
92 * This code does not handle multiple-byte character sets (MBCS)
93 * (such as ISO-2022-JP) with charset switching via escapes...
95 void * /* O - Charset map pointer */
96 cupsCharmapGet(const cups_encoding_t encoding
)
99 char *datadir
; /* CUPS_DATADIR environment variable */
100 char mapname
[80]; /* Name of charset map */
101 char filename
[1024]; /* Filename for charset map file */
104 * Check for valid arguments...
106 if ((encoding
< 0) || (encoding
>= CUPS_ENCODING_VBCS_END
))
110 * Get the data directory and charset map name...
112 if ((datadir
= getenv("CUPS_DATADIR")) == NULL
)
113 datadir
= CUPS_DATADIR
;
114 snprintf(mapname
, sizeof(mapname
), "%s.txt", cupsEncodingName(encoding
));
115 snprintf(filename
, sizeof(filename
), "%s/charmaps/%s",
119 * Read charset map input file into cache...
121 if (encoding
< CUPS_ENCODING_SBCS_END
)
122 return (get_sbcs_charmap(encoding
, filename
));
123 else if (encoding
< CUPS_ENCODING_VBCS_END
)
124 return (get_vbcs_charmap(encoding
, filename
));
130 * 'cupsCharmapFree()' - Free a character set map.
132 * This does not actually free; use 'cupsCharmapFlush()' for that.
135 cupsCharmapFree(const cups_encoding_t encoding
)
138 cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
139 cups_vmap_t
*vmap
; /* Legacy VBCS / Unicode Charset Map */
140 cups_globals_t
*cg
= _cupsGlobals();
141 /* Pointer to library globals */
144 * See if we already have this SBCS charset map loaded...
146 for (cmap
= cg
->cmap_cache
; cmap
!= NULL
; cmap
= cmap
->next
)
148 if (cmap
->encoding
== encoding
)
157 * See if we already have this DBCS/VBCS charset map loaded...
159 for (vmap
= cg
->vmap_cache
; vmap
!= NULL
; vmap
= vmap
->next
)
161 if (vmap
->encoding
== encoding
)
172 * 'cupsCharmapFlush()' - Flush all character set maps out of cache.
175 cupsCharmapFlush(void)
177 int i
; /* Looping variable */
178 cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
179 cups_vmap_t
*vmap
; /* Legacy VBCS / Unicode Charset Map */
180 cups_cmap_t
*cnext
; /* Next Legacy SBCS Charset Map */
181 cups_vmap_t
*vnext
; /* Next Legacy VBCS Charset Map */
182 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
183 cups_sbcs_t
*srow
; /* Pointer to SBCS row in 'uni2char' */
184 cups_vbcs_t
*vrow
; /* Pointer to VBCS row in 'uni2char' */
185 cups_globals_t
*cg
= _cupsGlobals();
186 /* Pointer to library globals */
189 * Loop through SBCS charset map cache, free all memory...
191 for (cmap
= cg
->cmap_cache
; cmap
!= NULL
; cmap
= cnext
)
193 for (i
= 0; i
< 256; i
++)
195 if ((srow
= cmap
->uni2char
[i
]) != NULL
)
201 cg
->cmap_cache
= NULL
;
204 * Loop through DBCS/VBCS charset map cache, free all memory...
206 for (vmap
= cg
->vmap_cache
; vmap
!= NULL
; vmap
= vnext
)
208 for (i
= 0; i
< 256; i
++)
210 if ((crow
= vmap
->char2uni
[i
]) != NULL
)
213 for (i
= 0; i
< 256; i
++)
215 if ((vrow
= vmap
->uni2char
[i
]) != NULL
)
219 free(vmap
->wide2uni
);
223 cg
->vmap_cache
= NULL
;
228 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
230 * This code handles single-byte (SBCS), double-byte (DBCS), and
231 * variable-byte (VBCS) character sets _without_ charset escapes...
232 * This code does not handle multiple-byte character sets (MBCS)
233 * (such as ISO-2022-JP) with charset switching via escapes...
235 int /* O - Count or -1 on error */
236 cupsUTF8ToCharset(char *dest
, /* O - Target string */
237 const cups_utf8_t
*src
, /* I - Source string */
238 const int maxout
, /* I - Max output */
239 const cups_encoding_t encoding
) /* I - Encoding */
242 * Check for valid arguments...
247 || (maxout
> CUPS_MAX_USTRING
)
249 || (encoding
== CUPS_UTF8
)
250 || (encoding
>= CUPS_ENCODING_VBCS_END
))
254 * Convert input UTF-8 to legacy charset...
256 if (encoding
< CUPS_ENCODING_SBCS_END
)
257 return (conv_utf8_to_sbcs(dest
, src
, maxout
, encoding
));
258 else if (encoding
< CUPS_ENCODING_VBCS_END
)
259 return (conv_utf8_to_vbcs(dest
, src
, maxout
, encoding
));
265 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
267 * This code handles single-byte (SBCS), double-byte (DBCS), and
268 * variable-byte (VBCS) character sets _without_ charset escapes...
269 * This code does not handle multiple-byte character sets (MBCS)
270 * (such as ISO-2022-JP) with charset switching via escapes...
272 int /* O - Count or -1 on error */
273 cupsCharsetToUTF8(cups_utf8_t
*dest
, /* O - Target string */
274 const char *src
, /* I - Source string */
275 const int maxout
, /* I - Max output */
276 const cups_encoding_t encoding
) /* I - Encoding */
279 * Check for valid arguments...
284 || (maxout
> CUPS_MAX_USTRING
)
286 || (encoding
== CUPS_UTF8
)
287 || (encoding
>= CUPS_ENCODING_VBCS_END
))
291 * Convert input legacy charset to UTF-8...
293 if (encoding
< CUPS_ENCODING_SBCS_END
)
294 return (conv_sbcs_to_utf8(dest
, src
, maxout
, encoding
));
295 else if (encoding
< CUPS_ENCODING_VBCS_END
)
296 return (conv_vbcs_to_utf8(dest
, src
, maxout
, encoding
));
302 * 'cupsUTF8ToUTF16()' - Convert UTF-8 to UTF-16.
304 * This code does not support Unicode beyond 16-bits (Plane 0)...
306 int /* O - Count or -1 on error */
307 cupsUTF8ToUTF16(cups_utf16_t
*dest
, /* O - Target string */
308 const cups_utf8_t
*src
, /* I - Source string */
309 const int maxout
) /* I - Max output */
311 int worklen
; /* Internal UCS-4 string length */
312 cups_utf32_t work
[CUPS_MAX_USTRING
];
313 /* Internal UCS-4 string */
316 * Check for valid arguments and clear output...
321 || (maxout
> CUPS_MAX_USTRING
))
326 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
328 worklen
= cupsUTF8ToUTF32(work
, src
, CUPS_MAX_USTRING
);
333 * Convert internal UCS-4 to output UTF-16...
335 worklen
= cupsUTF32ToUTF16(dest
, work
, maxout
);
340 * 'cupsUTF16ToUTF8()' - Convert UTF-16 to UTF-8.
342 * This code does not support Unicode beyond 16-bits (Plane 0)...
344 int /* O - Count or -1 on error */
345 cupsUTF16ToUTF8(cups_utf8_t
*dest
, /* O - Target string */
346 const cups_utf16_t
*src
, /* I - Source string */
347 const int maxout
) /* I - Max output */
349 int worklen
; /* Internal UCS-4 string length */
350 cups_utf32_t work
[CUPS_MAX_USTRING
];
351 /* Internal UCS-4 string */
354 * Check for valid arguments and clear output...
359 || (maxout
> CUPS_MAX_USTRING
))
364 * Convert input UTF-16 to internal UCS-4 (and byte-swap)...
366 worklen
= cupsUTF16ToUTF32(work
, src
, CUPS_MAX_USTRING
);
371 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
373 worklen
= cupsUTF32ToUTF8(dest
, work
, maxout
);
378 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
380 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
382 * UTF-32 char UTF-8 char(s)
383 * --------------------------------------------------
384 * 0 to 127 = 0xxxxxxx (US-ASCII)
385 * 128 to 2047 = 110xxxxx 10yyyyyy
386 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
387 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
389 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
390 * which would convert to five- or six-octet UTF-8 sequences...
392 * This code does not support Unicode beyond 16-bits (Plane 0)...
394 int /* O - Count or -1 on error */
395 cupsUTF8ToUTF32(cups_utf32_t
*dest
, /* O - Target string */
396 const cups_utf8_t
*src
, /* I - Source string */
397 const int maxout
) /* I - Max output */
399 cups_utf8_t
*first
= (cups_utf8_t
*) src
;
400 int srclen
; /* Source string length */
401 int i
; /* Looping variable */
402 cups_utf32_t ch
; /* Character value */
403 cups_utf32_t next
; /* Next character value */
404 cups_utf32_t ch32
; /* UTF-32 character value */
407 * Check for valid arguments and clear output...
412 || (maxout
> CUPS_MAX_USTRING
))
417 * Convert input UTF-8 to output UTF-32 (and insert BOM)...
421 srclen
= strlen((char *) src
);
422 for (i
= 1; i
< (maxout
- 1); src
++, dest
++)
424 ch
= (cups_utf32_t
) *src
;
431 * Convert UTF-8 character(s) to UTF-32 character...
433 if ((ch
& 0x7f) == ch
)
436 * One-octet UTF-8 <= 127 (US-ASCII)...
440 else if ((ch
& 0xe0) == 0xc0)
443 * Two-octet UTF-8 <= 2047 (Latin-x)...
446 next
= (cups_utf32_t
) *src
;
450 ch32
= ((ch
& 0x1f) << 6) | (next
& 0x3f);
453 * Check for non-shortest form (invalid UTF-8)...
459 else if ((ch
& 0xf0) == 0xe0)
462 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
465 next
= (cups_utf32_t
) *src
;
469 ch32
= ((ch
& 0x1f) << 6) | (next
& 0x3f);
471 next
= (cups_utf32_t
) *src
;
475 ch32
= ((ch32
<< 6) | (next
& 0x3f));
478 * Check for non-shortest form (invalid UTF-8)...
484 else if ((ch
& 0xf8) == 0xf0)
487 * Four-octet UTF-8 to Replacement Character...
489 if (((src
- first
) + 3) >= srclen
)
494 else if ((ch
& 0xfc) == 0xf8)
497 * Five-octet UTF-8 (invalid strict UTF-32)...
501 else if ((ch
& 0xfe) == 0xfc)
504 * Six-octet UTF-8 (invalid strict UTF-32)...
511 * More than six-octet (invalid UTF-8 sequence)...
517 * Check for UTF-16 surrogate (illegal UTF-8)...
519 if ((*dest
>= 0xd800) && (*dest
<= 0xdfff))
523 * Check for beyond Plane 16 (invalid UTF-8)...
525 if (*dest
> 0x10ffff)
533 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
535 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
537 * UTF-32 char UTF-8 char(s)
538 * --------------------------------------------------
539 * 0 to 127 = 0xxxxxxx (US-ASCII)
540 * 128 to 2047 = 110xxxxx 10yyyyyy
541 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
542 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
544 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
545 * which would convert to five- or six-octet UTF-8 sequences...
547 * This code does not support Unicode beyond 16-bits (Plane 0)...
549 int /* O - Count or -1 on error */
550 cupsUTF32ToUTF8(cups_utf8_t
*dest
, /* O - Target string */
551 const cups_utf32_t
*src
, /* I - Source string */
552 const int maxout
) /* I - Max output */
554 cups_utf32_t
*first
= (cups_utf32_t
*) src
;
555 /* First source char */
556 cups_utf8_t
*start
= dest
; /* Start of destination string */
557 int i
; /* Looping variable */
558 int swap
= 0; /* Byte-swap input to output */
559 cups_utf32_t ch
; /* Character value */
562 * Check for valid arguments and clear output...
571 * Check for leading BOM in UTF-32 and inverted BOM...
573 if (*src
== 0xfffe0000)
577 * Convert input UTF-32 to output UTF-8...
579 for (i
= 0; i
< (maxout
- 1); src
++)
586 * Byte swap input UTF-32, if necessary...
589 ch
= ((ch
>> 24) | ((ch
>> 8) & 0xff00) | ((ch
<< 8) & 0xff0000));
592 * Check for leading BOM (and delete from output)...
594 if ((src
== first
) && (ch
== 0xfeff))
598 * Check for beyond Plane 16 (invalid UTF-32)...
604 * Convert beyond Plane 0 (BMP) to Replacement Character...
610 * Convert UTF-32 character to UTF-8 character(s)...
615 * One-octet UTF-8 <= 127 (US-ASCII)...
617 *dest
= (cups_utf8_t
) ch
;
621 else if (ch
<= 0x7ff)
624 * Two-octet UTF-8 <= 2047 (Latin-x)...
626 if (i
> (maxout
- 2))
628 *dest
= (cups_utf8_t
) (0xc0 | ((ch
>> 6) & 0x1f));
631 *dest
= (cups_utf8_t
) (0x80 | (ch
& 0x3f));
638 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
640 if (i
> (maxout
- 3))
642 *dest
= (cups_utf8_t
) (0xe0 | ((ch
>> 12) & 0x0f));
645 *dest
= (cups_utf8_t
) (0x80 | ((ch
>> 6) & 0x3f));
648 *dest
= (cups_utf8_t
) (0x80 | (ch
& 0x3f));
654 i
= (int) (dest
- start
);
659 * 'cupsUTF16ToUTF32()' - Convert UTF-16 to UTF-32.
661 * This code does not support Unicode beyond 16-bits (Plane 0)...
663 int /* O - Count or -1 on error */
664 cupsUTF16ToUTF32(cups_utf32_t
*dest
, /* O - Target string */
665 const cups_utf16_t
*src
, /* I - Source string */
666 const int maxout
) /* I - Max output */
668 int i
; /* Looping variable */
669 int swap
= 0; /* Byte-swap input to output */
670 int surrogate
= 0; /* Expecting low-half surrogate */
671 cups_utf32_t ch
; /* Character value */
674 * Check for valid arguments and clear output...
679 || (maxout
> CUPS_MAX_USTRING
))
684 * Check for leading BOM in UTF-16 and inverted BOM...
690 * Convert input UTF-16 to output UTF-32...
692 for (i
= 0; i
< (maxout
- 1); src
++)
694 ch
= (cups_utf32_t
) (*src
& 0xffff);
700 * Byte swap input UTF-16, if necessary...
703 ch
= (cups_utf32_t
) ((ch
<< 8) | (ch
>> 8));
706 * Discard expected UTF-16 low-half surrogate...
708 if ((ch
>= 0xdc00) && (ch
<= 0xdfff))
717 * Convert UTF-16 high-half surrogate to Replacement Character...
719 if ((ch
>= 0xd800) && (ch
<= 0xdbff))
734 * 'cupsUTF32ToUTF16()' - Convert UTF-32 to UTF-16.
736 * This code does not support Unicode beyond 16-bits (Plane 0)...
738 int /* O - Count or -1 on error */
739 cupsUTF32ToUTF16(cups_utf16_t
*dest
, /* O - Target string */
740 const cups_utf32_t
*src
, /* I - Source string */
741 const int maxout
) /* I - Max output */
743 int i
; /* Looping variable */
744 int swap
= 0; /* Byte-swap input to output */
745 cups_utf32_t ch
; /* Character value */
748 * Check for valid arguments and clear output...
753 || (maxout
> CUPS_MAX_USTRING
))
758 * Check for leading BOM in UTF-32 and inverted BOM...
760 if (*src
== 0xfffe0000)
764 * Convert input UTF-32 to output UTF-16 (w/out surrogate pairs)...
766 for (i
= 0; i
< (maxout
- 1); src
++, dest
++)
774 * Byte swap input UTF-32, if necessary...
777 ch
= ((ch
>> 24) | ((ch
>> 8) & 0xff00) | ((ch
<< 8) & 0xff0000));
780 * Check for UTF-16 surrogate (illegal UTF-32)...
782 if ((ch
>= 0xd800) && (ch
<= 0xdfff))
786 * Check for beyond Plane 16 (invalid UTF-32)...
792 * Convert beyond Plane 0 (BMP) to Replacement Character...
796 *dest
= (cups_utf16_t
) ch
;
803 * 'get_charmap_count()' - Count lines in a charmap file.
805 static int /* O - Count or -1 on error */
806 get_charmap_count(const char *filename
) /* I - Charmap Filename */
808 int i
; /* Looping variable */
809 cups_file_t
*fp
; /* Map input file pointer */
810 char *s
; /* Line parsing pointer */
811 char line
[256]; /* Line from input map file */
812 cups_utf32_t unichar
; /* Unicode character value */
815 * Open map input file...
817 if ((filename
== NULL
) || (*filename
== '\0'))
819 fp
= cupsFileOpen(filename
, "r");
824 * Count lines in map input file...
826 for (i
= 0; i
< CUPS_MAX_CHARMAP_LINES
;)
828 s
= cupsFileGets(fp
, line
, sizeof(line
));
831 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
833 while ((*s
!= 0) && (*s
!= ' ') && (*s
!= '\t'))
835 while ((*s
== ' ') || (*s
== '\t'))
837 if (strncmp (s
, "0x", 2) == 0)
839 if ((sscanf(s
, "%lx", &unichar
) != 1)
840 || (unichar
> 0xffff))
851 * Close file and return charmap count (non-comment line count)...
858 * 'get_sbcs_charmap()' - Get SBCS Charmap.
860 static cups_cmap_t
* /* O - Charmap or 0 on error */
861 get_sbcs_charmap(const cups_encoding_t encoding
,
862 /* I - Charmap Encoding */
863 const char *filename
) /* I - Charmap Filename */
865 int i
; /* Loop variable */
866 unsigned long legchar
; /* Legacy character value */
867 cups_utf32_t unichar
; /* Unicode character value */
868 cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
869 cups_file_t
*fp
; /* Charset map file pointer */
870 char *s
; /* Line parsing pointer */
871 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
872 cups_sbcs_t
*srow
; /* Pointer to SBCS row in 'uni2char' */
873 char line
[256]; /* Line from charset map file */
874 cups_globals_t
*cg
= _cupsGlobals();
875 /* Pointer to library globals */
878 * Check for valid arguments...
880 if ((encoding
< 0) || (filename
== NULL
))
884 * See if we already have this SBCS charset map loaded...
886 for (cmap
= cg
->cmap_cache
; cmap
!= NULL
; cmap
= cmap
->next
)
888 if (cmap
->encoding
== encoding
)
891 return ((void *) cmap
);
896 * Open SBCS charset map input file...
898 fp
= cupsFileOpen(filename
, "r");
903 * Allocate memory for SBCS charset map and add to cache...
905 cmap
= (cups_cmap_t
*) calloc(1, sizeof(cups_cmap_t
));
911 cmap
->next
= cg
->cmap_cache
;
912 cg
->cmap_cache
= cmap
;
914 cmap
->encoding
= encoding
;
917 * Save SBCS charset map into memory for transcoding...
919 for (i
= 0; i
< CUPS_MAX_CHARMAP_LINES
;)
921 s
= cupsFileGets(fp
, line
, sizeof(line
));
924 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
926 if (strncmp (s
, "0x", 2) == 0)
928 if ((sscanf(s
, "%lx", &legchar
) != 1)
935 while ((*s
!= 0) && (*s
!= ' ') && (*s
!= '\t'))
937 while ((*s
== ' ') || (*s
== '\t'))
939 if (strncmp (s
, "0x", 2) == 0)
941 if (sscanf(s
, "%lx", &unichar
) != 1)
950 * Convert beyond Plane 0 (BMP) to Replacement Character...
952 if (unichar
> 0xffff)
956 * Save legacy to Unicode mapping in direct lookup table...
958 crow
= &cmap
->char2uni
[(int) legchar
];
959 *crow
= (cups_ucs2_t
) (unichar
& 0xffff);
962 * Save Unicode to legacy mapping in indirect lookup table...
964 srow
= cmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)];
967 srow
= (cups_sbcs_t
*) calloc(256, sizeof(cups_sbcs_t
));
974 cmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)] = srow
;
976 srow
+= (int) (unichar
& 0xff);
979 * Convert Replacement Character to visible replacement...
981 if (unichar
== 0xfffd)
982 legchar
= (unsigned long) '?';
985 * First (oldest) legacy character uses Unicode mapping cell...
988 *srow
= (cups_sbcs_t
) legchar
;
995 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
997 static cups_vmap_t
* /* O - Charmap or 0 on error */
998 get_vbcs_charmap(const cups_encoding_t encoding
,
999 /* I - Charmap Encoding */
1000 const char *filename
) /* I - Charmap Filename */
1002 cups_vmap_t
*vmap
; /* Legacy VBCS / Unicode Charset Map */
1003 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
1004 cups_vbcs_t
*vrow
; /* Pointer to VBCS row in 'uni2char' */
1005 cups_wide2uni_t
*wide2uni
; /* Pointer to row in 'wide2uni' */
1006 cups_sbcs_t leadchar
; /* Lead char of 2-byte legacy char */
1007 unsigned long legchar
; /* Legacy character value */
1008 cups_utf32_t unichar
; /* Unicode character value */
1009 int mapcount
; /* Count of lines in charmap file */
1010 cups_file_t
*fp
; /* Charset map file pointer */
1011 char *s
; /* Line parsing pointer */
1012 char line
[256]; /* Line from charset map file */
1013 int i
; /* Loop variable */
1014 int wide
; /* 32-bit legacy char */
1015 cups_globals_t
*cg
= _cupsGlobals();
1016 /* Pointer to library globals */
1019 * Check for valid arguments...
1021 if ((encoding
< 0) || (filename
== NULL
))
1025 * See if we already have this DBCS/VBCS charset map loaded...
1027 for (vmap
= cg
->vmap_cache
; vmap
!= NULL
; vmap
= vmap
->next
)
1029 if (vmap
->encoding
== encoding
)
1032 return ((void *) vmap
);
1037 * Count lines in charmap file...
1039 mapcount
= get_charmap_count(filename
);
1044 * Open VBCS charset map input file...
1046 fp
= cupsFileOpen(filename
, "r");
1051 * Allocate memory for DBCS/VBCS charset map and add to cache...
1053 vmap
= (cups_vmap_t
*) calloc(1, sizeof(cups_vmap_t
));
1059 vmap
->next
= cg
->vmap_cache
;
1060 cg
->vmap_cache
= vmap
;
1062 vmap
->encoding
= encoding
;
1065 * Save DBCS/VBCS charset map into memory for transcoding...
1070 for (i
= 0, wide
= 0; i
< mapcount
; )
1072 s
= cupsFileGets(fp
, line
, sizeof(line
));
1075 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
1077 if (strncmp (s
, "0x", 2) == 0)
1079 if ((sscanf(s
, "%lx", &legchar
) != 1)
1080 || ((legchar
> 0xffff) && (encoding
< CUPS_ENCODING_DBCS_END
)))
1086 while ((*s
!= 0) && (*s
!= ' ') && (*s
!= '\t'))
1088 while ((*s
== ' ') || (*s
== '\t'))
1090 if (strncmp (s
, "0x", 2) == 0)
1092 if (sscanf(s
, "%lx", &unichar
) != 1)
1101 * Convert beyond Plane 0 (BMP) to Replacement Character...
1103 if (unichar
> 0xffff)
1107 * Save lead char of 2/3/4-byte legacy char...
1109 if ((legchar
> 0xff) && (legchar
<= 0xffff))
1111 leadchar
= (cups_sbcs_t
) (legchar
>> 8);
1112 vmap
->lead2char
[leadchar
] = leadchar
;
1114 if ((legchar
> 0xffff) && (legchar
<= 0xffffff))
1116 leadchar
= (cups_sbcs_t
) (legchar
>> 16);
1117 vmap
->lead3char
[leadchar
] = leadchar
;
1119 if (legchar
> 0xffffff)
1121 leadchar
= (cups_sbcs_t
) (legchar
>> 24);
1122 vmap
->lead4char
[leadchar
] = leadchar
;
1126 * Save Legacy to Unicode mapping...
1128 if (legchar
<= 0xffff)
1131 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1133 crow
= vmap
->char2uni
[(int) leadchar
];
1136 crow
= (cups_ucs2_t
*) calloc(256, sizeof(cups_ucs2_t
));
1143 vmap
->char2uni
[(int) leadchar
] = crow
;
1145 crow
+= (int) (legchar
& 0xff);
1146 *crow
= (cups_vbcs_t
) unichar
;
1151 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1156 vmap
->widecount
= (mapcount
- i
+ 1);
1157 wide2uni
= (cups_wide2uni_t
*)
1158 calloc(vmap
->widecount
, sizeof(cups_wide2uni_t
));
1159 if (wide2uni
== NULL
)
1165 vmap
->wide2uni
= wide2uni
;
1167 wide2uni
->widechar
= (cups_vbcs_t
) legchar
;
1168 wide2uni
->unichar
= unichar
;
1173 * Save Unicode to legacy mapping in indirect lookup table...
1175 vrow
= vmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)];
1178 vrow
= (cups_vbcs_t
*) calloc(256, sizeof(cups_vbcs_t
));
1185 vmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)] = vrow
;
1187 vrow
+= (int) (unichar
& 0xff);
1190 * Convert Replacement Character to visible replacement...
1192 if (unichar
== 0xfffd)
1193 legchar
= (unsigned long) '?';
1196 * First (oldest) legacy character uses Unicode mapping cell...
1199 *vrow
= (cups_vbcs_t
) legchar
;
1201 vmap
->charcount
= (i
- vmap
->widecount
);
1207 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
1209 static int /* O - Count or -1 on error */
1210 conv_utf8_to_sbcs(char *dest
, /* O - Target string */
1211 const cups_utf8_t
*src
, /* I - Source string */
1212 const int maxout
, /* I - Max output */
1213 const cups_encoding_t encoding
) /* I - Encoding */
1215 char *start
= dest
; /* Start of destination string */
1216 cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
1217 cups_sbcs_t
*srow
; /* Pointer to SBCS row in 'uni2char' */
1218 cups_utf32_t unichar
; /* Character value */
1219 int worklen
; /* Internal UCS-4 string length */
1220 cups_utf32_t work
[CUPS_MAX_USTRING
];
1221 /* Internal UCS-4 string */
1222 int i
; /* Looping variable */
1225 * Check for valid arguments and clear output...
1230 || (maxout
> CUPS_MAX_USTRING
)
1231 || (encoding
== CUPS_UTF8
))
1236 * Find legacy charset map in cache...
1238 cmap
= (cups_cmap_t
*) cupsCharmapGet(encoding
);
1243 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1245 worklen
= cupsUTF8ToUTF32(work
, src
, CUPS_MAX_USTRING
);
1250 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
1252 for (i
= 0; i
< worklen
;)
1260 * Check for leading BOM (and delete from output)...
1262 if ((i
== 1) && (unichar
== 0xfeff))
1266 * Convert ASCII verbatim (optimization)...
1268 if (unichar
<= 0x7f)
1270 *dest
= (char) unichar
;
1276 * Convert unknown character to visible replacement...
1278 srow
= cmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)];
1280 srow
+= (int) (unichar
& 0xff);
1281 if ((srow
== NULL
) || (*srow
== 0))
1284 *dest
= (char) (*srow
);
1288 worklen
= (int) (dest
- start
);
1289 cupsCharmapFree(encoding
);
1294 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
1296 static int /* O - Count or -1 on error */
1297 conv_utf8_to_vbcs(char *dest
, /* O - Target string */
1298 const cups_utf8_t
*src
, /* I - Source string */
1299 const int maxout
, /* I - Max output */
1300 const cups_encoding_t encoding
) /* I - Encoding */
1302 char *start
= dest
; /* Start of destination string */
1303 cups_vmap_t
*vmap
; /* Legacy DBCS / Unicode Charset Map */
1304 cups_vbcs_t
*vrow
; /* Pointer to VBCS row in 'uni2char' */
1305 cups_utf32_t unichar
; /* Character value */
1306 cups_vbcs_t legchar
; /* Legacy character value */
1307 int worklen
; /* Internal UCS-4 string length */
1308 cups_utf32_t work
[CUPS_MAX_USTRING
];
1309 /* Internal UCS-4 string */
1310 int i
; /* Looping variable */
1313 * Check for valid arguments and clear output...
1318 || (maxout
> CUPS_MAX_USTRING
)
1319 || (encoding
== CUPS_UTF8
))
1324 * Find legacy charset map in cache...
1326 vmap
= (cups_vmap_t
*) cupsCharmapGet(encoding
);
1331 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1333 worklen
= cupsUTF8ToUTF32(work
, src
, CUPS_MAX_USTRING
);
1338 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
1340 for (i
= 0; i
< worklen
;)
1348 * Check for leading BOM (and delete from output)...
1350 if ((i
== 1) && (unichar
== 0xfeff))
1354 * Convert ASCII verbatim (optimization)...
1356 if (unichar
<= 0x7f)
1358 *dest
= (char) unichar
;
1364 * Convert unknown character to visible replacement...
1366 vrow
= vmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)];
1368 vrow
+= (int) (unichar
& 0xff);
1369 if ((vrow
== NULL
) || (*vrow
== 0))
1370 legchar
= (cups_vbcs_t
) '?';
1372 legchar
= (cups_vbcs_t
) *vrow
;
1375 * Save n-byte legacy character...
1377 if (legchar
> 0xffffff)
1379 *dest
= (char) ((legchar
>> 24) & 0xff);
1382 if (legchar
> 0xffff)
1384 *dest
= (char) ((legchar
>> 16) & 0xff);
1389 *dest
= (char) ((legchar
>> 8) & 0xff);
1392 *dest
= (char) (legchar
& 0xff);
1396 worklen
= (int) (dest
- start
);
1397 cupsCharmapFree(encoding
);
1402 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
1404 static int /* O - Count or -1 on error */
1405 conv_sbcs_to_utf8(cups_utf8_t
*dest
, /* O - Target string */
1406 const char *src
, /* I - Source string */
1407 const int maxout
, /* I - Max output */
1408 const cups_encoding_t encoding
) /* I - Encoding */
1410 cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
1411 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
1412 unsigned long legchar
; /* Legacy character value */
1413 cups_utf32_t unichar
; /* Unicode character value */
1414 int worklen
; /* Internal UCS-4 string length */
1415 cups_utf32_t work
[CUPS_MAX_USTRING
];
1416 /* Internal UCS-4 string */
1417 int i
; /* Looping variable */
1420 * Check for valid arguments and clear output...
1425 || (maxout
> CUPS_MAX_USTRING
)
1426 || (encoding
== CUPS_UTF8
))
1431 * Find legacy charset map in cache...
1433 cmap
= (cups_cmap_t
*) cupsCharmapGet(encoding
);
1438 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1441 for (i
= 1; i
< (CUPS_MAX_USTRING
- 1); src
++)
1445 legchar
= (unsigned long) *src
;
1448 * Convert ASCII verbatim (optimization)...
1450 if (legchar
<= 0x7f)
1452 work
[i
] = (cups_utf32_t
) legchar
;
1458 * Convert unknown character to Replacement Character...
1460 crow
= &cmap
->char2uni
[0];
1461 crow
+= (int) legchar
;
1465 unichar
= (cups_utf32_t
) *crow
;
1472 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1474 worklen
= cupsUTF32ToUTF8(dest
, work
, maxout
);
1475 cupsCharmapFree(encoding
);
1481 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1483 static int /* O - Count or -1 on error */
1484 conv_vbcs_to_utf8(cups_utf8_t
*dest
, /* O - Target string */
1485 const char *src
, /* I - Source string */
1486 const int maxout
, /* I - Max output */
1487 const cups_encoding_t encoding
) /* I - Encoding */
1489 cups_vmap_t
*vmap
; /* Legacy VBCS / Unicode Charset Map */
1490 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
1491 cups_wide2uni_t
*wide2uni
; /* Pointer to row in 'wide2uni' */
1492 cups_sbcs_t leadchar
; /* Lead char of n-byte legacy char */
1493 cups_vbcs_t legchar
; /* Legacy character value */
1494 cups_utf32_t unichar
; /* Unicode character value */
1495 int i
; /* Looping variable */
1496 int worklen
; /* Internal UCS-4 string length */
1497 cups_utf32_t work
[CUPS_MAX_USTRING
];
1498 /* Internal UCS-4 string */
1501 * Check for valid arguments and clear output...
1506 || (maxout
> CUPS_MAX_USTRING
)
1507 || (encoding
== CUPS_UTF8
))
1512 * Find legacy charset map in cache...
1514 vmap
= (cups_vmap_t
*) cupsCharmapGet(encoding
);
1519 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1522 for (i
= 1; i
< (CUPS_MAX_USTRING
- 1); src
++)
1526 legchar
= (cups_vbcs_t
) *src
;
1527 leadchar
= (cups_sbcs_t
) *src
;
1530 * Convert ASCII verbatim (optimization)...
1532 if (legchar
<= 0x7f)
1534 work
[i
] = (cups_utf32_t
) legchar
;
1540 * Convert 2-byte legacy character...
1542 if (vmap
->lead2char
[(int) leadchar
] == leadchar
)
1547 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1550 * Convert unknown character to Replacement Character...
1552 crow
= vmap
->char2uni
[(int) ((legchar
>> 8) & 0xff)];
1554 crow
+= (int) (legchar
& 0xff);
1555 if ((crow
== NULL
) || (*crow
== 0))
1558 unichar
= (cups_utf32_t
) *crow
;
1565 * Fetch 3-byte or 4-byte legacy character...
1567 if (vmap
->lead3char
[(int) leadchar
] == leadchar
)
1572 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1576 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1578 else if (vmap
->lead4char
[(int) leadchar
] == leadchar
)
1583 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1587 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1591 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1597 * Find 3-byte or 4-byte legacy character...
1599 wide2uni
= vmap
->wide2uni
;
1600 wide2uni
= (cups_wide2uni_t
*) bsearch(&legchar
,
1603 sizeof(cups_wide2uni_t
),
1607 * Convert unknown character to Replacement Character...
1609 if ((wide2uni
== NULL
) || (wide2uni
->unichar
== 0))
1612 unichar
= wide2uni
->unichar
;
1619 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1621 worklen
= cupsUTF32ToUTF8(dest
, work
, maxout
);
1622 cupsCharmapFree(encoding
);
1627 * 'compare_wide()' - Compare key for wide (VBCS) match.
1630 compare_wide(const void *k1
, /* I - Key char */
1631 const void *k2
) /* I - Map char */
1633 cups_vbcs_t
*kp
= (cups_vbcs_t
*) k1
;
1634 /* Key char pointer */
1635 cups_wide2uni_t
*mp
= (cups_wide2uni_t
*) k2
;
1636 /* Map char pointer */
1637 cups_vbcs_t key
; /* Legacy key character */
1638 cups_vbcs_t map
; /* Legacy map character */
1639 int result
; /* Result Value */
1644 result
= (int) (key
- map
);
1646 result
= -1 * ((int) (map
- key
));