]>
git.ipfire.org Git - thirdparty/cups.git/blob - cups/transcode.c
2 * "$Id: transcode.c 5300 2006-03-17 19:50:14Z mike $"
4 * Transcoding support for the Common UNIX Printing System (CUPS).
6 * Copyright 1997-2006 by Easy Software Products.
8 * These coded instructions, statements, and computer programs are
9 * the property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the
11 * file "LICENSE.txt" which should have been included with this file.
12 * If this file is missing or damaged please contact Easy Software
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
26 * _cupsCharmapFlush() - Flush all character set maps out of cache.
27 * _cupsCharmapFree() - Free a character set map.
28 * _cupsCharmapGet() - Get a character set map.
29 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
30 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
31 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
32 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
33 * compare_wide() - Compare key for wide (VBCS) match.
34 * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
35 * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
36 * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
37 * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
38 * free_sbcs_charmap() - Free memory used by a single byte character set.
39 * free_vbcs_charmap() - Free memory used by a variable byte character set.
40 * get_charmap_count() - Count lines in a charmap file.
41 * get_sbcs_charmap() - Get SBCS Charmap.
42 * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
46 * Include necessary headers...
60 static int compare_wide(const void *k1
, const void *k2
);
61 static int conv_sbcs_to_utf8(cups_utf8_t
*dest
,
62 const cups_sbcs_t
*src
,
64 const cups_encoding_t encoding
);
65 static int conv_utf8_to_sbcs(cups_sbcs_t
*dest
,
66 const cups_utf8_t
*src
,
68 const cups_encoding_t encoding
);
69 static int conv_utf8_to_vbcs(cups_sbcs_t
*dest
,
70 const cups_utf8_t
*src
,
72 const cups_encoding_t encoding
);
73 static int conv_vbcs_to_utf8(cups_utf8_t
*dest
,
74 const cups_sbcs_t
*src
,
76 const cups_encoding_t encoding
);
77 static void free_sbcs_charmap(_cups_cmap_t
*sbcs
);
78 static void free_vbcs_charmap(_cups_vmap_t
*vbcs
);
79 static int get_charmap_count(cups_file_t
*fp
);
80 static _cups_cmap_t
*get_sbcs_charmap(const cups_encoding_t encoding
,
81 const char *filename
);
82 static _cups_vmap_t
*get_vbcs_charmap(const cups_encoding_t encoding
,
83 const char *filename
);
87 * '_cupsCharmapFlush()' - Flush all character set maps out of cache.
91 _cupsCharmapFlush(_cups_globals_t
*cg
) /* I - Global data */
93 _cups_cmap_t
*cmap
, /* Legacy SBCS / Unicode Charset Map */
94 *cnext
; /* Next Legacy SBCS Charset Map */
95 _cups_vmap_t
*vmap
, /* Legacy VBCS / Unicode Charset Map */
96 *vnext
; /* Next Legacy VBCS Charset Map */
100 * Loop through SBCS charset map cache, free all memory...
103 for (cmap
= cg
->cmap_cache
; cmap
; cmap
= cnext
)
107 free_sbcs_charmap(cmap
);
110 cg
->cmap_cache
= NULL
;
113 * Loop through DBCS/VBCS charset map cache, free all memory...
116 for (vmap
= cg
->vmap_cache
; vmap
; vmap
= vnext
)
120 free_vbcs_charmap(vmap
);
125 cg
->vmap_cache
= NULL
;
130 * '_cupsCharmapFree()' - Free a character set map.
132 * This does not actually free; use '_cupsCharmapFlush()' for that.
137 const cups_encoding_t encoding
) /* I - Encoding */
139 _cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
140 _cups_vmap_t
*vmap
; /* Legacy VBCS / Unicode Charset Map */
141 _cups_globals_t
*cg
= _cupsGlobals(); /* Pointer to library globals */
145 * See if we already have this SBCS charset map loaded...
148 for (cmap
= cg
->cmap_cache
; cmap
; cmap
= cmap
->next
)
150 if (cmap
->encoding
== encoding
)
160 * See if we already have this DBCS/VBCS charset map loaded...
163 for (vmap
= cg
->vmap_cache
; vmap
; vmap
= vmap
->next
)
165 if (vmap
->encoding
== encoding
)
176 * '_cupsCharmapGet()' - Get a character set map.
178 * This code handles single-byte (SBCS), double-byte (DBCS), and
179 * variable-byte (VBCS) character sets _without_ charset escapes...
180 * This code does not handle multiple-byte character sets (MBCS)
181 * (such as ISO-2022-JP) with charset switching via escapes...
184 void * /* O - Charset map pointer */
186 const cups_encoding_t encoding
) /* I - Encoding */
188 char filename
[1024]; /* Filename for charset map file */
189 _cups_globals_t
*cg
= _cupsGlobals(); /* Global data */
192 DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding
));
195 * Check for valid arguments...
198 if (encoding
< 0 || encoding
>= CUPS_ENCODING_VBCS_END
)
200 DEBUG_puts(" Bad encoding, returning NULL!");
205 * Get the data directory and charset map name...
208 snprintf(filename
, sizeof(filename
), "%s/charmaps/%s.txt",
209 cg
->cups_datadir
, _cupsEncodingName(encoding
));
211 DEBUG_printf((" filename=\"%s\"\n", filename
));
214 * Read charset map input file into cache...
217 if (encoding
< CUPS_ENCODING_SBCS_END
)
218 return (get_sbcs_charmap(encoding
, filename
));
219 else if (encoding
< CUPS_ENCODING_VBCS_END
)
220 return (get_vbcs_charmap(encoding
, filename
));
227 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
229 * This code handles single-byte (SBCS), double-byte (DBCS), and
230 * variable-byte (VBCS) character sets _without_ charset escapes...
231 * This code does not handle multiple-byte character sets (MBCS)
232 * (such as ISO-2022-JP) with charset switching via escapes...
235 int /* O - Count or -1 on error */
237 cups_utf8_t
*dest
, /* O - Target string */
238 const char *src
, /* I - Source string */
239 const int maxout
, /* I - Max output */
240 const cups_encoding_t encoding
) /* I - Encoding */
243 * Check for valid arguments...
246 DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n",
247 dest
, src
, maxout
, encoding
));
252 if (!dest
|| !src
|| maxout
< 1 || maxout
> CUPS_MAX_USTRING
)
254 DEBUG_puts(" Bad arguments, returning -1");
259 * Handle identity conversions...
262 if (encoding
== CUPS_UTF8
||
263 encoding
< 0 || encoding
>= CUPS_ENCODING_VBCS_END
)
265 strlcpy((char *)dest
, src
, maxout
);
266 return (strlen((char *)dest
));
270 * Convert input legacy charset to UTF-8...
273 if (encoding
< CUPS_ENCODING_SBCS_END
)
274 return (conv_sbcs_to_utf8(dest
, (cups_sbcs_t
*)src
, maxout
, encoding
));
275 else if (encoding
< CUPS_ENCODING_VBCS_END
)
276 return (conv_vbcs_to_utf8(dest
, (cups_sbcs_t
*)src
, maxout
, encoding
));
279 puts(" Bad encoding, returning -1");
286 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
288 * This code handles single-byte (SBCS), double-byte (DBCS), and
289 * variable-byte (VBCS) character sets _without_ charset escapes...
290 * This code does not handle multiple-byte character sets (MBCS)
291 * (such as ISO-2022-JP) with charset switching via escapes...
294 int /* O - Count or -1 on error */
296 char *dest
, /* O - Target string */
297 const cups_utf8_t
*src
, /* I - Source string */
298 const int maxout
, /* I - Max output */
299 const cups_encoding_t encoding
) /* I - Encoding */
302 * Check for valid arguments...
305 if (!dest
|| !src
|| maxout
< 1 || maxout
> CUPS_MAX_USTRING
)
314 * Handle identity conversions...
317 if (encoding
== CUPS_UTF8
||
318 encoding
< 0 || encoding
>= CUPS_ENCODING_VBCS_END
)
320 strlcpy(dest
, (char *)src
, maxout
);
321 return (strlen(dest
));
325 * Convert input UTF-8 to legacy charset...
328 if (encoding
< CUPS_ENCODING_SBCS_END
)
329 return (conv_utf8_to_sbcs((cups_sbcs_t
*)dest
, src
, maxout
, encoding
));
330 else if (encoding
< CUPS_ENCODING_VBCS_END
)
331 return (conv_utf8_to_vbcs((cups_sbcs_t
*)dest
, src
, maxout
, encoding
));
338 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
340 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
342 * UTF-32 char UTF-8 char(s)
343 * --------------------------------------------------
344 * 0 to 127 = 0xxxxxxx (US-ASCII)
345 * 128 to 2047 = 110xxxxx 10yyyyyy
346 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
347 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
349 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
350 * which would convert to five- or six-octet UTF-8 sequences...
353 int /* O - Count or -1 on error */
355 cups_utf32_t
*dest
, /* O - Target string */
356 const cups_utf8_t
*src
, /* I - Source string */
357 const int maxout
) /* I - Max output */
359 size_t srclen
; /* Source string length */
360 int i
; /* Looping variable */
361 cups_utf8_t ch
; /* Character value */
362 cups_utf8_t next
; /* Next character value */
363 cups_utf32_t ch32
; /* UTF-32 character value */
367 * Check for valid arguments and clear output...
373 if (!dest
|| !src
|| maxout
< 1 || maxout
> CUPS_MAX_USTRING
)
377 * Convert input UTF-8 to output UTF-32 (and insert BOM)...
381 srclen
= strlen((char *)src
);
383 for (i
= maxout
- 1; *src
&& i
> 0; i
--)
388 * Convert UTF-8 character(s) to UTF-32 character...
394 * One-octet UTF-8 <= 127 (US-ASCII)...
399 else if ((ch
& 0xe0) == 0xc0)
402 * Two-octet UTF-8 <= 2047 (Latin-x)...
409 ch32
= ((ch
& 0x1f) << 6) | (next
& 0x3f);
412 * Check for non-shortest form (invalid UTF-8)...
420 else if ((ch
& 0xf0) == 0xe0)
423 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
430 ch32
= ((ch
& 0x0f) << 6) | (next
& 0x3f);
436 ch32
= (ch32
<< 6) | (next
& 0x3f);
439 * Check for non-shortest form (invalid UTF-8)...
447 else if ((ch
& 0xf8) == 0xf0)
450 * Four-octet UTF-8...
457 ch32
= ((ch
& 0x07) << 6) | (next
& 0x3f);
463 ch32
= (ch32
<< 6) | (next
& 0x3f);
469 ch32
= (ch32
<< 6) | (next
& 0x3f);
472 * Check for non-shortest form (invalid UTF-8)...
483 * More than 4-octet (invalid UTF-8 sequence)...
490 * Check for UTF-16 surrogate (illegal UTF-8)...
493 if (*dest
>= 0xd800 && *dest
<= 0xdfff)
504 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
506 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
508 * UTF-32 char UTF-8 char(s)
509 * --------------------------------------------------
510 * 0 to 127 = 0xxxxxxx (US-ASCII)
511 * 128 to 2047 = 110xxxxx 10yyyyyy
512 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
513 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
515 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
516 * which would convert to five- or six-octet UTF-8 sequences...
519 int /* O - Count or -1 on error */
521 cups_utf8_t
*dest
, /* O - Target string */
522 const cups_utf32_t
*src
, /* I - Source string */
523 const int maxout
) /* I - Max output */
525 cups_utf8_t
*start
; /* Start of destination string */
526 int i
; /* Looping variable */
527 int swap
; /* Byte-swap input to output */
528 cups_utf32_t ch
; /* Character value */
532 * Check for valid arguments and clear output...
538 if (!dest
|| !src
|| maxout
< 1)
542 * Check for leading BOM in UTF-32 and inverted BOM...
546 swap
= *src
== 0xfffe0000;
548 if (*src
== 0xfffe0000 || *src
== 0xfeff)
552 * Convert input UTF-32 to output UTF-8...
555 for (i
= maxout
- 1; *src
&& i
> 0;)
560 * Byte swap input UTF-32, if necessary...
561 * (only byte-swapping 24 of 32 bits)
565 ch
= ((ch
>> 24) | ((ch
>> 8) & 0xff00) | ((ch
<< 8) & 0xff0000));
568 * Check for beyond Plane 16 (invalid UTF-32)...
575 * Convert UTF-32 character to UTF-8 character(s)...
581 * One-octet UTF-8 <= 127 (US-ASCII)...
584 *dest
++ = (cups_utf8_t
)ch
;
590 * Two-octet UTF-8 <= 2047 (Latin-x)...
596 *dest
++ = (cups_utf8_t
)(0xc0 | ((ch
>> 6) & 0x1f));
597 *dest
++ = (cups_utf8_t
)(0x80 | (ch
& 0x3f));
600 else if (ch
< 0x10000)
603 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
609 *dest
++ = (cups_utf8_t
)(0xe0 | ((ch
>> 12) & 0x0f));
610 *dest
++ = (cups_utf8_t
)(0x80 | ((ch
>> 6) & 0x3f));
611 *dest
++ = (cups_utf8_t
)(0x80 | (ch
& 0x3f));
617 * Four-octet UTF-8...
623 *dest
++ = (cups_utf8_t
)(0xf0 | ((ch
>> 18) & 0x07));
624 *dest
++ = (cups_utf8_t
)(0x80 | ((ch
>> 12) & 0x3f));
625 *dest
++ = (cups_utf8_t
)(0x80 | ((ch
>> 6) & 0x3f));
626 *dest
++ = (cups_utf8_t
)(0x80 | (ch
& 0x3f));
633 return ((int)(dest
- start
));
638 * 'compare_wide()' - Compare key for wide (VBCS) match.
642 compare_wide(const void *k1
, /* I - Key char */
643 const void *k2
) /* I - Map char */
645 cups_vbcs_t key
; /* Legacy key character */
646 cups_vbcs_t map
; /* Legacy map character */
649 key
= *((cups_vbcs_t
*)k1
);
650 map
= ((_cups_wide2uni_t
*)k2
)->widechar
;
652 return ((int)(key
- map
));
657 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
660 static int /* O - Count or -1 on error */
662 cups_utf8_t
*dest
, /* O - Target string */
663 const cups_sbcs_t
*src
, /* I - Source string */
664 int maxout
, /* I - Max output */
665 const cups_encoding_t encoding
) /* I - Encoding */
667 _cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
668 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
669 cups_sbcs_t legchar
; /* Legacy character value */
670 cups_utf32_t work
[CUPS_MAX_USTRING
], /* Internal UCS-4 string */
671 *workptr
; /* Pointer into string */
675 * Find legacy charset map in cache...
678 if ((cmap
= (_cups_cmap_t
*)_cupsCharmapGet(encoding
)) == NULL
)
682 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
686 for (workptr
= work
+ 1; *src
&& workptr
< (work
+ CUPS_MAX_USTRING
- 1);)
691 * Convert ASCII verbatim (optimization)...
695 *workptr
++ = (cups_utf32_t
)legchar
;
699 * Convert unknown character to Replacement Character...
702 crow
= cmap
->char2uni
+ legchar
;
707 *workptr
++ = (cups_utf32_t
)*crow
;
714 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
717 _cupsCharmapFree(encoding
);
719 return (cupsUTF32ToUTF8(dest
, work
, maxout
));
724 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
727 static int /* O - Count or -1 on error */
729 cups_sbcs_t
*dest
, /* O - Target string */
730 const cups_utf8_t
*src
, /* I - Source string */
731 int maxout
, /* I - Max output */
732 const cups_encoding_t encoding
) /* I - Encoding */
734 cups_sbcs_t
*start
; /* Start of destination string */
735 _cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
736 cups_sbcs_t
*srow
; /* Pointer to SBCS row in 'uni2char' */
737 cups_utf32_t unichar
; /* Character value */
738 cups_utf32_t work
[CUPS_MAX_USTRING
], /* Internal UCS-4 string */
739 *workptr
; /* Pointer into string */
743 * Find legacy charset map in cache...
746 if ((cmap
= (_cups_cmap_t
*) _cupsCharmapGet(encoding
)) == NULL
)
750 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
753 if (cupsUTF8ToUTF32(work
, src
, CUPS_MAX_USTRING
) < 0)
757 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
760 for (workptr
= work
+ 1, start
= dest
; *workptr
&& maxout
> 1; maxout
--)
762 unichar
= *workptr
++;
767 * Convert ASCII verbatim (optimization)...
772 *dest
++ = (cups_sbcs_t
)unichar
;
777 * Convert unknown character to visible replacement...
780 srow
= cmap
->uni2char
[(int)((unichar
>> 8) & 0xff)];
783 srow
+= (int)(unichar
& 0xff);
793 _cupsCharmapFree(encoding
);
795 return ((int)(dest
- start
));
800 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
803 static int /* O - Count or -1 on error */
805 cups_sbcs_t
*dest
, /* O - Target string */
806 const cups_utf8_t
*src
, /* I - Source string */
807 int maxout
, /* I - Max output */
808 const cups_encoding_t encoding
) /* I - Encoding */
810 cups_sbcs_t
*start
; /* Start of destination string */
811 _cups_vmap_t
*vmap
; /* Legacy DBCS / Unicode Charset Map */
812 cups_vbcs_t
*vrow
; /* Pointer to VBCS row in 'uni2char' */
813 cups_utf32_t unichar
; /* Character value */
814 cups_vbcs_t legchar
; /* Legacy character value */
815 cups_utf32_t work
[CUPS_MAX_USTRING
], /* Internal UCS-4 string */
816 *workptr
; /* Pointer into string */
820 * Find legacy charset map in cache...
823 if ((vmap
= (_cups_vmap_t
*)_cupsCharmapGet(encoding
)) == NULL
)
827 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
830 if (cupsUTF8ToUTF32(work
, src
, CUPS_MAX_USTRING
) < 0)
834 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
837 for (start
= dest
, workptr
= work
+ 1; *workptr
&& maxout
> 1; maxout
--)
839 unichar
= *workptr
++;
844 * Convert ASCII verbatim (optimization)...
849 *dest
++ = (cups_vbcs_t
)unichar
;
854 * Convert unknown character to visible replacement...
857 vrow
= vmap
->uni2char
[(int)((unichar
>> 8) & 0xff)];
860 vrow
+= (int)(unichar
& 0xff);
863 legchar
= (cups_vbcs_t
)'?';
865 legchar
= (cups_vbcs_t
)*vrow
;
868 * Save n-byte legacy character...
871 if (legchar
> 0xffffff)
876 *dest
++ = (cups_sbcs_t
)(legchar
>> 24);
877 *dest
++ = (cups_sbcs_t
)(legchar
>> 16);
878 *dest
++ = (cups_sbcs_t
)(legchar
>> 8);
879 *dest
++ = (cups_sbcs_t
)legchar
;
883 else if (legchar
> 0xffff)
888 *dest
++ = (cups_sbcs_t
)(legchar
>> 16);
889 *dest
++ = (cups_sbcs_t
)(legchar
>> 8);
890 *dest
++ = (cups_sbcs_t
)legchar
;
894 else if (legchar
> 0xff)
896 *dest
++ = (cups_sbcs_t
)(legchar
>> 8);
897 *dest
++ = (cups_sbcs_t
)legchar
;
905 _cupsCharmapFree(encoding
);
907 return ((int)(dest
- start
));
912 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
915 static int /* O - Count or -1 on error */
917 cups_utf8_t
*dest
, /* O - Target string */
918 const cups_sbcs_t
*src
, /* I - Source string */
919 int maxout
, /* I - Max output */
920 const cups_encoding_t encoding
) /* I - Encoding */
922 _cups_vmap_t
*vmap
; /* Legacy VBCS / Unicode Charset Map */
923 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
924 _cups_wide2uni_t
*wide2uni
; /* Pointer to row in 'wide2uni' */
925 cups_sbcs_t leadchar
; /* Lead char of n-byte legacy char */
926 cups_vbcs_t legchar
; /* Legacy character value */
927 cups_utf32_t work
[CUPS_MAX_USTRING
], /* Internal UCS-4 string */
928 *workptr
; /* Pointer into string */
932 * Find legacy charset map in cache...
935 if ((vmap
= (_cups_vmap_t
*)_cupsCharmapGet(encoding
)) == NULL
)
939 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
943 for (workptr
= work
+ 1; *src
&& workptr
< (work
+ CUPS_MAX_USTRING
- 1);)
946 leadchar
= (cups_sbcs_t
)legchar
;
949 * Convert ASCII verbatim (optimization)...
954 *workptr
++ = (cups_utf32_t
)legchar
;
959 * Convert 2-byte legacy character...
962 if (vmap
->lead2char
[(int)leadchar
] == leadchar
)
967 legchar
= (legchar
<< 8) | *src
++;
970 * Convert unknown character to Replacement Character...
973 crow
= vmap
->char2uni
[(int)((legchar
>> 8) & 0xff)];
975 crow
+= (int) (legchar
& 0xff);
980 *workptr
++ = (cups_utf32_t
)*crow
;
985 * Fetch 3-byte or 4-byte legacy character...
988 if (vmap
->lead3char
[(int)leadchar
] == leadchar
)
990 if (!*src
|| !src
[1])
993 legchar
= (legchar
<< 8) | *src
++;
994 legchar
= (legchar
<< 8) | *src
++;
996 else if (vmap
->lead4char
[(int)leadchar
] == leadchar
)
998 if (!*src
|| !src
[1] || !src
[2])
1001 legchar
= (legchar
<< 8) | *src
++;
1002 legchar
= (legchar
<< 8) | *src
++;
1003 legchar
= (legchar
<< 8) | *src
++;
1009 * Find 3-byte or 4-byte legacy character...
1012 wide2uni
= (_cups_wide2uni_t
*)bsearch(&legchar
,
1015 sizeof(_cups_wide2uni_t
),
1019 * Convert unknown character to Replacement Character...
1022 if (!wide2uni
|| !wide2uni
->unichar
)
1023 *workptr
++ = 0xfffd;
1025 *workptr
++ = wide2uni
->unichar
;
1030 _cupsCharmapFree(encoding
);
1033 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1036 return (cupsUTF32ToUTF8(dest
, work
, maxout
));
1041 * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
1045 free_sbcs_charmap(_cups_cmap_t
*cmap
) /* I - Character set */
1047 int i
; /* Looping variable */
1050 for (i
= 0; i
< 256; i
++)
1051 if (cmap
->uni2char
[i
])
1052 free(cmap
->uni2char
[i
]);
1059 * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
1063 free_vbcs_charmap(_cups_vmap_t
*vmap
) /* I - Character set */
1065 int i
; /* Looping variable */
1068 for (i
= 0; i
< 256; i
++)
1069 if (vmap
->char2uni
[i
])
1070 free(vmap
->char2uni
[i
]);
1072 for (i
= 0; i
< 256; i
++)
1073 if (vmap
->uni2char
[i
])
1074 free(vmap
->uni2char
[i
]);
1077 free(vmap
->wide2uni
);
1084 * 'get_charmap_count()' - Count lines in a charmap file.
1087 static int /* O - Count or -1 on error */
1088 get_charmap_count(cups_file_t
*fp
) /* I - File to read from */
1090 int count
; /* Number of lines */
1091 char line
[256]; /* Line from input map file */
1095 * Count lines in map input file...
1100 while (cupsFileGets(fp
, line
, sizeof(line
)))
1105 * Return the number of lines...
1116 * 'get_sbcs_charmap()' - Get SBCS Charmap.
1119 static _cups_cmap_t
* /* O - Charmap or 0 on error */
1121 const cups_encoding_t encoding
, /* I - Charmap Encoding */
1122 const char *filename
) /* I - Charmap Filename */
1124 unsigned long legchar
; /* Legacy character value */
1125 cups_utf32_t unichar
; /* Unicode character value */
1126 _cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
1127 cups_file_t
*fp
; /* Charset map file pointer */
1128 char *s
; /* Line parsing pointer */
1129 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
1130 cups_sbcs_t
*srow
; /* Pointer to SBCS row in 'uni2char' */
1131 char line
[256]; /* Line from charset map file */
1132 _cups_globals_t
*cg
= _cupsGlobals(); /* Pointer to library globals */
1136 * See if we already have this SBCS charset map loaded...
1139 for (cmap
= cg
->cmap_cache
; cmap
; cmap
= cmap
->next
)
1141 if (cmap
->encoding
== encoding
)
1144 DEBUG_printf((" returning existing cmap=%p\n", cmap
));
1145 return ((void *)cmap
);
1150 * Open SBCS charset map input file...
1153 if ((fp
= cupsFileOpen(filename
, "r")) == NULL
)
1157 * Allocate memory for SBCS charset map...
1160 if ((cmap
= (_cups_cmap_t
*)calloc(1, sizeof(_cups_cmap_t
))) == NULL
)
1163 DEBUG_puts(" Unable to allocate memory!");
1168 cmap
->encoding
= encoding
;
1171 * Save SBCS charset map into memory for transcoding...
1174 while (cupsFileGets(fp
, line
, sizeof(line
)))
1179 legchar
= strtol(line
, &s
, 16);
1180 if (legchar
< 0 || legchar
> 0xff)
1183 unichar
= strtol(s
, NULL
, 16);
1184 if (unichar
< 0 || unichar
> 0xffff)
1188 * Save legacy to Unicode mapping in direct lookup table...
1191 crow
= cmap
->char2uni
+ legchar
;
1192 *crow
= (cups_ucs2_t
)(unichar
& 0xffff);
1195 * Save Unicode to legacy mapping in indirect lookup table...
1198 srow
= cmap
->uni2char
[(unichar
>> 8) & 0xff];
1201 srow
= (cups_sbcs_t
*)calloc(256, sizeof(cups_sbcs_t
));
1205 cmap
->uni2char
[(unichar
>> 8) & 0xff] = srow
;
1208 srow
+= unichar
& 0xff;
1211 * Convert Replacement Character to visible replacement...
1214 if (unichar
== 0xfffd)
1215 legchar
= (unsigned long)'?';
1218 * First (oldest) legacy character uses Unicode mapping cell...
1222 *srow
= (cups_sbcs_t
)legchar
;
1228 * Add it to the cache and return...
1231 cmap
->next
= cg
->cmap_cache
;
1232 cg
->cmap_cache
= cmap
;
1234 DEBUG_printf((" returning new cmap=%p\n", cmap
));
1239 * If we get here, there was an error in the cmap file...
1244 free_sbcs_charmap(cmap
);
1248 DEBUG_puts(" Error, returning NULL!");
1255 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1258 static _cups_vmap_t
* /* O - Charmap or 0 on error */
1260 const cups_encoding_t encoding
, /* I - Charmap Encoding */
1261 const char *filename
) /* I - Charmap Filename */
1263 _cups_vmap_t
*vmap
; /* Legacy VBCS / Unicode Charset Map */
1264 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
1265 cups_vbcs_t
*vrow
; /* Pointer to VBCS row in 'uni2char' */
1266 _cups_wide2uni_t
*wide2uni
; /* Pointer to row in 'wide2uni' */
1267 cups_sbcs_t leadchar
; /* Lead char of 2-byte legacy char */
1268 unsigned long legchar
; /* Legacy character value */
1269 cups_utf32_t unichar
; /* Unicode character value */
1270 int mapcount
; /* Count of lines in charmap file */
1271 cups_file_t
*fp
; /* Charset map file pointer */
1272 char *s
; /* Line parsing pointer */
1273 char line
[256]; /* Line from charset map file */
1274 int i
; /* Loop variable */
1275 int wide
; /* 32-bit legacy char */
1276 _cups_globals_t
*cg
= _cupsGlobals(); /* Pointer to library globals */
1279 DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
1280 encoding
, filename
));
1283 * See if we already have this DBCS/VBCS charset map loaded...
1286 for (vmap
= cg
->vmap_cache
; vmap
; vmap
= vmap
->next
)
1288 if (vmap
->encoding
== encoding
)
1291 DEBUG_printf((" returning existing vmap=%p\n", vmap
));
1292 return ((void *)vmap
);
1297 * Open VBCS charset map input file...
1300 if ((fp
= cupsFileOpen(filename
, "r")) == NULL
)
1302 DEBUG_printf((" Unable to open file: %s\n", strerror(errno
)));
1307 * Count lines in charmap file...
1310 if ((mapcount
= get_charmap_count(fp
)) <= 0)
1312 DEBUG_puts(" Unable to get charmap count!");
1316 DEBUG_printf((" mapcount=%d\n", mapcount
));
1319 * Allocate memory for DBCS/VBCS charset map...
1322 if ((vmap
= (_cups_vmap_t
*)calloc(1, sizeof(_cups_vmap_t
))) == NULL
)
1325 DEBUG_puts(" Unable to allocate memory!");
1330 vmap
->encoding
= encoding
;
1333 * Save DBCS/VBCS charset map into memory for transcoding...
1344 while (cupsFileGets(fp
, line
, sizeof(line
)))
1349 legchar
= strtoul(line
, &s
, 16);
1350 if (legchar
== ULONG_MAX
)
1353 unichar
= strtol(s
, NULL
, 16);
1354 if (unichar
< 0 || unichar
> 0xffff)
1359 /* DEBUG_printf((" i=%d, legchar=0x%08lx, unichar=0x%04x\n", i,
1360 legchar, (unsigned)unichar)); */
1363 * Save lead char of 2/3/4-byte legacy char...
1366 if (legchar
> 0xff && legchar
<= 0xffff)
1368 leadchar
= (cups_sbcs_t
)(legchar
>> 8);
1369 vmap
->lead2char
[leadchar
] = leadchar
;
1372 if (legchar
> 0xffff && legchar
<= 0xffffff)
1374 leadchar
= (cups_sbcs_t
)(legchar
>> 16);
1375 vmap
->lead3char
[leadchar
] = leadchar
;
1378 if (legchar
> 0xffffff)
1380 leadchar
= (cups_sbcs_t
)(legchar
>> 24);
1381 vmap
->lead4char
[leadchar
] = leadchar
;
1385 * Save Legacy to Unicode mapping...
1388 if (legchar
<= 0xffff)
1391 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1394 crow
= vmap
->char2uni
[(int)leadchar
];
1397 crow
= (cups_ucs2_t
*)calloc(256, sizeof(cups_ucs2_t
));
1401 vmap
->char2uni
[(int)leadchar
] = crow
;
1404 crow
[(int)(legchar
& 0xff)] = (cups_ucs2_t
)unichar
;
1409 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1415 vmap
->widecount
= (mapcount
- i
+ 1);
1416 wide2uni
= (_cups_wide2uni_t
*)calloc(vmap
->widecount
,
1417 sizeof(_cups_wide2uni_t
));
1421 vmap
->wide2uni
= wide2uni
;
1424 wide2uni
->widechar
= (cups_vbcs_t
)legchar
;
1425 wide2uni
->unichar
= (cups_ucs2_t
)unichar
;
1430 * Save Unicode to legacy mapping in indirect lookup table...
1433 vrow
= vmap
->uni2char
[(int)((unichar
>> 8) & 0xff)];
1436 vrow
= (cups_vbcs_t
*)calloc(256, sizeof(cups_vbcs_t
));
1440 vmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)] = vrow
;
1443 vrow
+= (int)(unichar
& 0xff);
1446 * Convert Replacement Character to visible replacement...
1449 if (unichar
== 0xfffd)
1450 legchar
= (unsigned long)'?';
1453 * First (oldest) legacy character uses Unicode mapping cell...
1457 *vrow
= (cups_vbcs_t
)legchar
;
1460 vmap
->charcount
= (i
- vmap
->widecount
);
1465 * Add it to the cache and return...
1468 vmap
->next
= cg
->vmap_cache
;
1469 cg
->vmap_cache
= vmap
;
1471 DEBUG_printf((" returning new vmap=%p\n", vmap
));
1476 * If we get here, the file contains errors...
1481 free_vbcs_charmap(vmap
);
1485 DEBUG_puts(" Error, returning NULL!");
1492 * End of "$Id: transcode.c 5300 2006-03-17 19:50:14Z mike $"