]>
git.ipfire.org Git - thirdparty/cups.git/blob - cups/transcode.c
05d404bbc0bbd00b5298e8ee316a1ac41ff67abb
4 * Transcoding support for the Common UNIX Printing System (CUPS).
6 * Copyright 1997-2005 by Easy Software Products.
8 * These coded instructions, statements, and computer programs are
9 * the property of Easy Software Products and are protected by Federal
10 * copyright law. Distribution and use rights are outlined in the
11 * file "LICENSE.txt" which should have been included with this file.
12 * If this file is missing or damaged please contact Easy Software
15 * Attn: CUPS Licensing Information
16 * Easy Software Products
17 * 44141 Airport View Drive, Suite 204
18 * Hollywood, Maryland 20636 USA
20 * Voice: (301) 373-9600
21 * EMail: cups-info@cups.org
22 * WWW: http://www.cups.org
26 * cupsCharmapGet() - Get a character set map.
27 * cupsCharmapFree() - Free a character set map.
28 * cupsCharmapFlush() - Flush all character set maps out of cache.
29 * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
30 * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
31 * cupsUTF8ToUTF16() - Convert UTF-8 to UTF-16.
32 * cupsUTF16ToUTF8() - Convert UTF-16 to UTF-8.
33 * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
34 * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
35 * cupsUTF16ToUTF32() - Convert UTF-16 to UTF-32.
36 * cupsUTF32ToUTF16() - Convert UTF-32 to UTF-16.
37 * get_charmap_count() - Count lines in a charmap file.
38 * get_sbcs_charmap() - Get SBCS Charmap.
39 * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
40 * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
41 * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
42 * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
43 * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
44 * compare_wide() - Compare key for wide (VBCS) match.
48 * Include necessary headers...
60 #include "transcode.h"
67 static cups_cmap_t
*cmap_cache
= NULL
; /* SBCS Charmap Cache */
68 static cups_vmap_t
*vmap_cache
= NULL
; /* VBCS Charmap Cache */
74 static int get_charmap_count(const char *filename
);
75 static cups_cmap_t
*get_sbcs_charmap(const cups_encoding_t encoding
,
76 const char *filename
);
77 static cups_vmap_t
*get_vbcs_charmap(const cups_encoding_t encoding
,
78 const char *filename
);
80 static int conv_utf8_to_sbcs(char *dest
,
81 const cups_utf8_t
*src
,
83 const cups_encoding_t encoding
);
84 static int conv_utf8_to_vbcs(char *dest
,
85 const cups_utf8_t
*src
,
87 const cups_encoding_t encoding
);
89 static int conv_sbcs_to_utf8(cups_utf8_t
*dest
,
92 const cups_encoding_t encoding
);
93 static int conv_vbcs_to_utf8(cups_utf8_t
*dest
,
96 const cups_encoding_t encoding
);
98 static int compare_wide(const void *k1
, const void *k2
);
101 * 'cupsCharmapGet()' - Get a character set map.
103 * This code handles single-byte (SBCS), double-byte (DBCS), and
104 * variable-byte (VBCS) character sets _without_ charset escapes...
105 * This code does not handle multiple-byte character sets (MBCS)
106 * (such as ISO-2022-JP) with charset switching via escapes...
108 void * /* O - Charset map pointer */
109 cupsCharmapGet(const cups_encoding_t encoding
)
112 char *datadir
; /* CUPS_DATADIR environment variable */
113 char mapname
[80]; /* Name of charset map */
114 char filename
[256]; /* Filename for charset map file */
117 * Check for valid arguments...
119 if ((encoding
< 0) || (encoding
>= CUPS_ENCODING_VBCS_END
))
123 * Get the data directory and charset map name...
125 if ((datadir
= getenv("CUPS_DATADIR")) == NULL
)
126 datadir
= CUPS_DATADIR
;
127 snprintf(mapname
, sizeof(mapname
), "%s.txt", cupsEncodingName(encoding
));
128 snprintf(filename
, sizeof(filename
), "%s/charmaps/%s",
132 * Read charset map input file into cache...
134 if (encoding
< CUPS_ENCODING_SBCS_END
)
135 return (get_sbcs_charmap(encoding
, filename
));
136 else if (encoding
< CUPS_ENCODING_VBCS_END
)
137 return (get_vbcs_charmap(encoding
, filename
));
143 * 'cupsCharmapFree()' - Free a character set map.
145 * This does not actually free; use 'cupsCharmapFlush()' for that.
148 cupsCharmapFree(const cups_encoding_t encoding
)
151 cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
152 cups_vmap_t
*vmap
; /* Legacy VBCS / Unicode Charset Map */
155 * See if we already have this SBCS charset map loaded...
157 for (cmap
= cmap_cache
; cmap
!= NULL
; cmap
= cmap
->next
)
159 if (cmap
->encoding
== encoding
)
168 * See if we already have this DBCS/VBCS charset map loaded...
170 for (vmap
= vmap_cache
; vmap
!= NULL
; vmap
= vmap
->next
)
172 if (vmap
->encoding
== encoding
)
183 * 'cupsCharmapFlush()' - Flush all character set maps out of cache.
186 cupsCharmapFlush(void)
188 int i
; /* Looping variable */
189 cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
190 cups_vmap_t
*vmap
; /* Legacy VBCS / Unicode Charset Map */
191 cups_cmap_t
*cnext
; /* Next Legacy SBCS Charset Map */
192 cups_vmap_t
*vnext
; /* Next Legacy VBCS Charset Map */
193 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
194 cups_sbcs_t
*srow
; /* Pointer to SBCS row in 'uni2char' */
195 cups_vbcs_t
*vrow
; /* Pointer to VBCS row in 'uni2char' */
198 * Loop through SBCS charset map cache, free all memory...
200 for (cmap
= cmap_cache
; cmap
!= NULL
; cmap
= cnext
)
202 for (i
= 0; i
< 256; i
++)
204 if ((srow
= cmap
->uni2char
[i
]) != NULL
)
213 * Loop through DBCS/VBCS charset map cache, free all memory...
215 for (vmap
= vmap_cache
; vmap
!= NULL
; vmap
= vnext
)
217 for (i
= 0; i
< 256; i
++)
219 if ((crow
= vmap
->char2uni
[i
]) != NULL
)
222 for (i
= 0; i
< 256; i
++)
224 if ((vrow
= vmap
->uni2char
[i
]) != NULL
)
228 free(vmap
->wide2uni
);
237 * 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
239 * This code handles single-byte (SBCS), double-byte (DBCS), and
240 * variable-byte (VBCS) character sets _without_ charset escapes...
241 * This code does not handle multiple-byte character sets (MBCS)
242 * (such as ISO-2022-JP) with charset switching via escapes...
244 int /* O - Count or -1 on error */
245 cupsUTF8ToCharset(char *dest
, /* O - Target string */
246 const cups_utf8_t
*src
, /* I - Source string */
247 const int maxout
, /* I - Max output */
248 const cups_encoding_t encoding
) /* I - Encoding */
251 * Check for valid arguments...
256 || (maxout
> CUPS_MAX_USTRING
)
258 || (encoding
== CUPS_UTF8
)
259 || (encoding
>= CUPS_ENCODING_VBCS_END
))
263 * Convert input UTF-8 to legacy charset...
265 if (encoding
< CUPS_ENCODING_SBCS_END
)
266 return (conv_utf8_to_sbcs(dest
, src
, maxout
, encoding
));
267 else if (encoding
< CUPS_ENCODING_VBCS_END
)
268 return (conv_utf8_to_vbcs(dest
, src
, maxout
, encoding
));
274 * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
276 * This code handles single-byte (SBCS), double-byte (DBCS), and
277 * variable-byte (VBCS) character sets _without_ charset escapes...
278 * This code does not handle multiple-byte character sets (MBCS)
279 * (such as ISO-2022-JP) with charset switching via escapes...
281 int /* O - Count or -1 on error */
282 cupsCharsetToUTF8(cups_utf8_t
*dest
, /* O - Target string */
283 const char *src
, /* I - Source string */
284 const int maxout
, /* I - Max output */
285 const cups_encoding_t encoding
) /* I - Encoding */
288 * Check for valid arguments...
293 || (maxout
> CUPS_MAX_USTRING
)
295 || (encoding
== CUPS_UTF8
)
296 || (encoding
>= CUPS_ENCODING_VBCS_END
))
300 * Convert input legacy charset to UTF-8...
302 if (encoding
< CUPS_ENCODING_SBCS_END
)
303 return (conv_sbcs_to_utf8(dest
, src
, maxout
, encoding
));
304 else if (encoding
< CUPS_ENCODING_VBCS_END
)
305 return (conv_vbcs_to_utf8(dest
, src
, maxout
, encoding
));
311 * 'cupsUTF8ToUTF16()' - Convert UTF-8 to UTF-16.
313 * This code does not support Unicode beyond 16-bits (Plane 0)...
315 int /* O - Count or -1 on error */
316 cupsUTF8ToUTF16(cups_utf16_t
*dest
, /* O - Target string */
317 const cups_utf8_t
*src
, /* I - Source string */
318 const int maxout
) /* I - Max output */
320 int worklen
; /* Internal UCS-4 string length */
321 cups_utf32_t work
[CUPS_MAX_USTRING
];
322 /* Internal UCS-4 string */
325 * Check for valid arguments and clear output...
330 || (maxout
> CUPS_MAX_USTRING
))
335 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
337 worklen
= cupsUTF8ToUTF32(work
, src
, CUPS_MAX_USTRING
);
342 * Convert internal UCS-4 to output UTF-16...
344 worklen
= cupsUTF32ToUTF16(dest
, work
, maxout
);
349 * 'cupsUTF16ToUTF8()' - Convert UTF-16 to UTF-8.
351 * This code does not support Unicode beyond 16-bits (Plane 0)...
353 int /* O - Count or -1 on error */
354 cupsUTF16ToUTF8(cups_utf8_t
*dest
, /* O - Target string */
355 const cups_utf16_t
*src
, /* I - Source string */
356 const int maxout
) /* I - Max output */
358 int worklen
; /* Internal UCS-4 string length */
359 cups_utf32_t work
[CUPS_MAX_USTRING
];
360 /* Internal UCS-4 string */
363 * Check for valid arguments and clear output...
368 || (maxout
> CUPS_MAX_USTRING
))
373 * Convert input UTF-16 to internal UCS-4 (and byte-swap)...
375 worklen
= cupsUTF16ToUTF32(work
, src
, CUPS_MAX_USTRING
);
380 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
382 worklen
= cupsUTF32ToUTF8(dest
, work
, maxout
);
387 * 'cupsUTF8ToUTF32()' - Convert UTF-8 to UTF-32.
389 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
391 * UTF-32 char UTF-8 char(s)
392 * --------------------------------------------------
393 * 0 to 127 = 0xxxxxxx (US-ASCII)
394 * 128 to 2047 = 110xxxxx 10yyyyyy
395 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
396 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
398 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
399 * which would convert to five- or six-octet UTF-8 sequences...
401 * This code does not support Unicode beyond 16-bits (Plane 0)...
403 int /* O - Count or -1 on error */
404 cupsUTF8ToUTF32(cups_utf32_t
*dest
, /* O - Target string */
405 const cups_utf8_t
*src
, /* I - Source string */
406 const int maxout
) /* I - Max output */
408 cups_utf8_t
*first
= (cups_utf8_t
*) src
;
409 int srclen
; /* Source string length */
410 int i
; /* Looping variable */
411 cups_utf32_t ch
; /* Character value */
412 cups_utf32_t next
; /* Next character value */
413 cups_utf32_t ch32
; /* UTF-32 character value */
416 * Check for valid arguments and clear output...
421 || (maxout
> CUPS_MAX_USTRING
))
426 * Convert input UTF-8 to output UTF-32 (and insert BOM)...
430 srclen
= strlen((char *) src
);
431 for (i
= 1; i
< (maxout
- 1); src
++, dest
++)
433 ch
= (cups_utf32_t
) *src
;
440 * Convert UTF-8 character(s) to UTF-32 character...
442 if ((ch
& 0x7f) == ch
)
445 * One-octet UTF-8 <= 127 (US-ASCII)...
449 else if ((ch
& 0xe0) == 0xc0)
452 * Two-octet UTF-8 <= 2047 (Latin-x)...
455 next
= (cups_utf32_t
) *src
;
459 ch32
= ((ch
& 0x1f) << 6) | (next
& 0x3f);
462 * Check for non-shortest form (invalid UTF-8)...
468 else if ((ch
& 0xf0) == 0xe0)
471 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
474 next
= (cups_utf32_t
) *src
;
478 ch32
= ((ch
& 0x1f) << 6) | (next
& 0x3f);
480 next
= (cups_utf32_t
) *src
;
484 ch32
= ((ch32
<< 6) | (next
& 0x3f));
487 * Check for non-shortest form (invalid UTF-8)...
493 else if ((ch
& 0xf8) == 0xf0)
496 * Four-octet UTF-8 to Replacement Character...
498 if (((src
- first
) + 3) >= srclen
)
503 else if ((ch
& 0xfc) == 0xf8)
506 * Five-octet UTF-8 (invalid strict UTF-32)...
510 else if ((ch
& 0xfe) == 0xfc)
513 * Six-octet UTF-8 (invalid strict UTF-32)...
520 * More than six-octet (invalid UTF-8 sequence)...
526 * Check for UTF-16 surrogate (illegal UTF-8)...
528 if ((*dest
>= 0xd800) && (*dest
<= 0xdfff))
532 * Check for beyond Plane 16 (invalid UTF-8)...
534 if (*dest
> 0x10ffff)
542 * 'cupsUTF32ToUTF8()' - Convert UTF-32 to UTF-8.
544 * 32-bit UTF-32 (actually 21-bit) maps to UTF-8 as follows...
546 * UTF-32 char UTF-8 char(s)
547 * --------------------------------------------------
548 * 0 to 127 = 0xxxxxxx (US-ASCII)
549 * 128 to 2047 = 110xxxxx 10yyyyyy
550 * 2048 to 65535 = 1110xxxx 10yyyyyy 10zzzzzz
551 * > 65535 = 11110xxx 10yyyyyy 10zzzzzz 10xxxxxx
553 * UTF-32 prohibits chars beyond Plane 16 (> 0x10ffff) in UCS-4,
554 * which would convert to five- or six-octet UTF-8 sequences...
556 * This code does not support Unicode beyond 16-bits (Plane 0)...
558 int /* O - Count or -1 on error */
559 cupsUTF32ToUTF8(cups_utf8_t
*dest
, /* O - Target string */
560 const cups_utf32_t
*src
, /* I - Source string */
561 const int maxout
) /* I - Max output */
563 cups_utf32_t
*first
= (cups_utf32_t
*) src
;
564 /* First source char */
565 cups_utf8_t
*start
= dest
; /* Start of destination string */
566 int i
; /* Looping variable */
567 int swap
= 0; /* Byte-swap input to output */
568 cups_utf32_t ch
; /* Character value */
571 * Check for valid arguments and clear output...
580 * Check for leading BOM in UTF-32 and inverted BOM...
582 if (*src
== 0xfffe0000)
586 * Convert input UTF-32 to output UTF-8...
588 for (i
= 0; i
< (maxout
- 1); src
++)
595 * Byte swap input UTF-32, if necessary...
598 ch
= ((ch
>> 24) | ((ch
>> 8) & 0xff00) | ((ch
<< 8) & 0xff0000));
601 * Check for leading BOM (and delete from output)...
603 if ((src
== first
) && (ch
== 0xfeff))
607 * Check for beyond Plane 16 (invalid UTF-32)...
613 * Convert beyond Plane 0 (BMP) to Replacement Character...
619 * Convert UTF-32 character to UTF-8 character(s)...
624 * One-octet UTF-8 <= 127 (US-ASCII)...
626 *dest
= (cups_utf8_t
) ch
;
630 else if (ch
<= 0x7ff)
633 * Two-octet UTF-8 <= 2047 (Latin-x)...
635 if (i
> (maxout
- 2))
637 *dest
= (cups_utf8_t
) (0xc0 | ((ch
>> 6) & 0x1f));
640 *dest
= (cups_utf8_t
) (0x80 | (ch
& 0x3f));
647 * Three-octet UTF-8 <= 65535 (Plane 0 - BMP)...
649 if (i
> (maxout
- 3))
651 *dest
= (cups_utf8_t
) (0xe0 | ((ch
>> 12) & 0x0f));
654 *dest
= (cups_utf8_t
) (0x80 | ((ch
>> 6) & 0x3f));
657 *dest
= (cups_utf8_t
) (0x80 | (ch
& 0x3f));
663 i
= (int) (dest
- start
);
668 * 'cupsUTF16ToUTF32()' - Convert UTF-16 to UTF-32.
670 * This code does not support Unicode beyond 16-bits (Plane 0)...
672 int /* O - Count or -1 on error */
673 cupsUTF16ToUTF32(cups_utf32_t
*dest
, /* O - Target string */
674 const cups_utf16_t
*src
, /* I - Source string */
675 const int maxout
) /* I - Max output */
677 int i
; /* Looping variable */
678 int swap
= 0; /* Byte-swap input to output */
679 int surrogate
= 0; /* Expecting low-half surrogate */
680 cups_utf32_t ch
; /* Character value */
683 * Check for valid arguments and clear output...
688 || (maxout
> CUPS_MAX_USTRING
))
693 * Check for leading BOM in UTF-16 and inverted BOM...
699 * Convert input UTF-16 to output UTF-32...
701 for (i
= 0; i
< (maxout
- 1); src
++)
703 ch
= (cups_utf32_t
) (*src
& 0xffff);
709 * Byte swap input UTF-16, if necessary...
712 ch
= (cups_utf32_t
) ((ch
<< 8) | (ch
>> 8));
715 * Discard expected UTF-16 low-half surrogate...
717 if ((ch
>= 0xdc00) && (ch
<= 0xdfff))
726 * Convert UTF-16 high-half surrogate to Replacement Character...
728 if ((ch
>= 0xd800) && (ch
<= 0xdbff))
743 * 'cupsUTF32ToUTF16()' - Convert UTF-32 to UTF-16.
745 * This code does not support Unicode beyond 16-bits (Plane 0)...
747 int /* O - Count or -1 on error */
748 cupsUTF32ToUTF16(cups_utf16_t
*dest
, /* O - Target string */
749 const cups_utf32_t
*src
, /* I - Source string */
750 const int maxout
) /* I - Max output */
752 int i
; /* Looping variable */
753 int swap
= 0; /* Byte-swap input to output */
754 cups_utf32_t ch
; /* Character value */
757 * Check for valid arguments and clear output...
762 || (maxout
> CUPS_MAX_USTRING
))
767 * Check for leading BOM in UTF-32 and inverted BOM...
769 if (*src
== 0xfffe0000)
773 * Convert input UTF-32 to output UTF-16 (w/out surrogate pairs)...
775 for (i
= 0; i
< (maxout
- 1); src
++, dest
++)
783 * Byte swap input UTF-32, if necessary...
786 ch
= ((ch
>> 24) | ((ch
>> 8) & 0xff00) | ((ch
<< 8) & 0xff0000));
789 * Check for UTF-16 surrogate (illegal UTF-32)...
791 if ((ch
>= 0xd800) && (ch
<= 0xdfff))
795 * Check for beyond Plane 16 (invalid UTF-32)...
801 * Convert beyond Plane 0 (BMP) to Replacement Character...
805 *dest
= (cups_utf16_t
) ch
;
812 * 'get_charmap_count()' - Count lines in a charmap file.
814 static int /* O - Count or -1 on error */
815 get_charmap_count(const char *filename
) /* I - Charmap Filename */
817 int i
; /* Looping variable */
818 FILE *fp
; /* Map input file pointer */
819 char *s
; /* Line parsing pointer */
820 char line
[256]; /* Line from input map file */
821 cups_utf32_t unichar
; /* Unicode character value */
824 * Open map input file...
826 if ((filename
== NULL
) || (*filename
== '\0'))
828 fp
= fopen(filename
, "r");
833 * Count lines in map input file...
835 for (i
= 0; i
< CUPS_MAX_CHARMAP_LINES
;)
837 s
= fgets(&line
[0], sizeof(line
), fp
);
840 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
842 while ((*s
!= 0) && (*s
!= ' ') && (*s
!= '\t'))
844 while ((*s
== ' ') || (*s
== '\t'))
846 if (strncmp (s
, "0x", 2) == 0)
848 if ((sscanf(s
, "%lx", &unichar
) != 1)
849 || (unichar
> 0xffff))
860 * Close file and return charmap count (non-comment line count)...
867 * 'get_sbcs_charmap()' - Get SBCS Charmap.
869 static cups_cmap_t
* /* O - Charmap or 0 on error */
870 get_sbcs_charmap(const cups_encoding_t encoding
,
871 /* I - Charmap Encoding */
872 const char *filename
) /* I - Charmap Filename */
874 int i
; /* Loop variable */
875 unsigned long legchar
; /* Legacy character value */
876 cups_utf32_t unichar
; /* Unicode character value */
877 cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
878 FILE *fp
; /* Charset map file pointer */
879 char *s
; /* Line parsing pointer */
880 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
881 cups_sbcs_t
*srow
; /* Pointer to SBCS row in 'uni2char' */
882 char line
[256]; /* Line from charset map file */
885 * Check for valid arguments...
887 if ((encoding
< 0) || (filename
== NULL
))
891 * See if we already have this SBCS charset map loaded...
893 for (cmap
= cmap_cache
; cmap
!= NULL
; cmap
= cmap
->next
)
895 if (cmap
->encoding
== encoding
)
898 return ((void *) cmap
);
903 * Open SBCS charset map input file...
905 fp
= fopen(filename
, "r");
910 * Allocate memory for SBCS charset map and add to cache...
912 cmap
= (cups_cmap_t
*) calloc(1, sizeof(cups_cmap_t
));
918 cmap
->next
= cmap_cache
;
921 cmap
->encoding
= encoding
;
924 * Save SBCS charset map into memory for transcoding...
926 for (i
= 0; i
< CUPS_MAX_CHARMAP_LINES
;)
928 s
= fgets(&line
[0], sizeof(line
), fp
);
931 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
933 if (strncmp (s
, "0x", 2) == 0)
935 if ((sscanf(s
, "%lx", &legchar
) != 1)
942 while ((*s
!= 0) && (*s
!= ' ') && (*s
!= '\t'))
944 while ((*s
== ' ') || (*s
== '\t'))
946 if (strncmp (s
, "0x", 2) == 0)
948 if (sscanf(s
, "%lx", &unichar
) != 1)
957 * Convert beyond Plane 0 (BMP) to Replacement Character...
959 if (unichar
> 0xffff)
963 * Save legacy to Unicode mapping in direct lookup table...
965 crow
= &cmap
->char2uni
[(int) legchar
];
966 *crow
= (cups_ucs2_t
) (unichar
& 0xffff);
969 * Save Unicode to legacy mapping in indirect lookup table...
971 srow
= cmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)];
974 srow
= (cups_sbcs_t
*) calloc(256, sizeof(cups_sbcs_t
));
981 cmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)] = srow
;
983 srow
+= (int) (unichar
& 0xff);
986 * Convert Replacement Character to visible replacement...
988 if (unichar
== 0xfffd)
989 legchar
= (unsigned long) '?';
992 * First (oldest) legacy character uses Unicode mapping cell...
995 *srow
= (cups_sbcs_t
) legchar
;
1002 * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
1004 static cups_vmap_t
* /* O - Charmap or 0 on error */
1005 get_vbcs_charmap(const cups_encoding_t encoding
,
1006 /* I - Charmap Encoding */
1007 const char *filename
) /* I - Charmap Filename */
1009 cups_vmap_t
*vmap
; /* Legacy VBCS / Unicode Charset Map */
1010 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
1011 cups_vbcs_t
*vrow
; /* Pointer to VBCS row in 'uni2char' */
1012 cups_wide2uni_t
*wide2uni
; /* Pointer to row in 'wide2uni' */
1013 cups_sbcs_t leadchar
; /* Lead char of 2-byte legacy char */
1014 unsigned long legchar
; /* Legacy character value */
1015 cups_utf32_t unichar
; /* Unicode character value */
1016 int mapcount
; /* Count of lines in charmap file */
1017 FILE *fp
; /* Charset map file pointer */
1018 char *s
; /* Line parsing pointer */
1019 char line
[256]; /* Line from charset map file */
1020 int i
; /* Loop variable */
1021 int wide
; /* 32-bit legacy char */
1024 * Check for valid arguments...
1026 if ((encoding
< 0) || (filename
== NULL
))
1030 * See if we already have this DBCS/VBCS charset map loaded...
1032 for (vmap
= vmap_cache
; vmap
!= NULL
; vmap
= vmap
->next
)
1034 if (vmap
->encoding
== encoding
)
1037 return ((void *) vmap
);
1042 * Count lines in charmap file...
1044 mapcount
= get_charmap_count(filename
);
1049 * Open VBCS charset map input file...
1051 fp
= fopen(filename
, "r");
1056 * Allocate memory for DBCS/VBCS charset map and add to cache...
1058 vmap
= (cups_vmap_t
*) calloc(1, sizeof(cups_vmap_t
));
1064 vmap
->next
= vmap_cache
;
1067 vmap
->encoding
= encoding
;
1070 * Save DBCS/VBCS charset map into memory for transcoding...
1075 for (i
= 0, wide
= 0; i
< mapcount
; )
1077 s
= fgets(&line
[0], sizeof(line
), fp
);
1080 if ((*s
== '#') || (*s
== '\n') || (*s
== '\0'))
1082 if (strncmp (s
, "0x", 2) == 0)
1084 if ((sscanf(s
, "%lx", &legchar
) != 1)
1085 || ((legchar
> 0xffff) && (encoding
< CUPS_ENCODING_DBCS_END
)))
1091 while ((*s
!= 0) && (*s
!= ' ') && (*s
!= '\t'))
1093 while ((*s
== ' ') || (*s
== '\t'))
1095 if (strncmp (s
, "0x", 2) == 0)
1097 if (sscanf(s
, "%lx", &unichar
) != 1)
1106 * Convert beyond Plane 0 (BMP) to Replacement Character...
1108 if (unichar
> 0xffff)
1112 * Save lead char of 2/3/4-byte legacy char...
1114 if ((legchar
> 0xff) && (legchar
<= 0xffff))
1116 leadchar
= (cups_sbcs_t
) (legchar
>> 8);
1117 vmap
->lead2char
[leadchar
] = leadchar
;
1119 if ((legchar
> 0xffff) && (legchar
<= 0xffffff))
1121 leadchar
= (cups_sbcs_t
) (legchar
>> 16);
1122 vmap
->lead3char
[leadchar
] = leadchar
;
1124 if (legchar
> 0xffffff)
1126 leadchar
= (cups_sbcs_t
) (legchar
>> 24);
1127 vmap
->lead4char
[leadchar
] = leadchar
;
1131 * Save Legacy to Unicode mapping...
1133 if (legchar
<= 0xffff)
1136 * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
1138 crow
= vmap
->char2uni
[(int) leadchar
];
1141 crow
= (cups_ucs2_t
*) calloc(256, sizeof(cups_ucs2_t
));
1148 vmap
->char2uni
[(int) leadchar
] = crow
;
1150 crow
+= (int) (legchar
& 0xff);
1151 *crow
= (cups_vbcs_t
) unichar
;
1156 * Save VBCS 32-bit to Unicode mapping in sorted list table...
1161 vmap
->widecount
= (mapcount
- i
+ 1);
1162 wide2uni
= (cups_wide2uni_t
*)
1163 calloc(vmap
->widecount
, sizeof(cups_wide2uni_t
));
1164 if (wide2uni
== NULL
)
1170 vmap
->wide2uni
= wide2uni
;
1172 wide2uni
->widechar
= (cups_vbcs_t
) legchar
;
1173 wide2uni
->unichar
= unichar
;
1178 * Save Unicode to legacy mapping in indirect lookup table...
1180 vrow
= vmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)];
1183 vrow
= (cups_vbcs_t
*) calloc(256, sizeof(cups_vbcs_t
));
1190 vmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)] = vrow
;
1192 vrow
+= (int) (unichar
& 0xff);
1195 * Convert Replacement Character to visible replacement...
1197 if (unichar
== 0xfffd)
1198 legchar
= (unsigned long) '?';
1201 * First (oldest) legacy character uses Unicode mapping cell...
1204 *vrow
= (cups_vbcs_t
) legchar
;
1206 vmap
->charcount
= (i
- vmap
->widecount
);
1212 * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
1214 static int /* O - Count or -1 on error */
1215 conv_utf8_to_sbcs(char *dest
, /* O - Target string */
1216 const cups_utf8_t
*src
, /* I - Source string */
1217 const int maxout
, /* I - Max output */
1218 const cups_encoding_t encoding
) /* I - Encoding */
1220 char *start
= dest
; /* Start of destination string */
1221 cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
1222 cups_sbcs_t
*srow
; /* Pointer to SBCS row in 'uni2char' */
1223 cups_utf32_t unichar
; /* Character value */
1224 int worklen
; /* Internal UCS-4 string length */
1225 cups_utf32_t work
[CUPS_MAX_USTRING
];
1226 /* Internal UCS-4 string */
1227 int i
; /* Looping variable */
1230 * Check for valid arguments and clear output...
1235 || (maxout
> CUPS_MAX_USTRING
)
1236 || (encoding
== CUPS_UTF8
))
1241 * Find legacy charset map in cache...
1243 cmap
= (cups_cmap_t
*) cupsCharmapGet(encoding
);
1248 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1250 worklen
= cupsUTF8ToUTF32(work
, src
, CUPS_MAX_USTRING
);
1255 * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
1257 for (i
= 0; i
< worklen
;)
1265 * Check for leading BOM (and delete from output)...
1267 if ((i
== 1) && (unichar
== 0xfeff))
1271 * Convert ASCII verbatim (optimization)...
1273 if (unichar
<= 0x7f)
1275 *dest
= (char) unichar
;
1281 * Convert unknown character to visible replacement...
1283 srow
= cmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)];
1285 srow
+= (int) (unichar
& 0xff);
1286 if ((srow
== NULL
) || (*srow
== 0))
1289 *dest
= (char) (*srow
);
1293 worklen
= (int) (dest
- start
);
1294 cupsCharmapFree(encoding
);
1299 * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
1301 static int /* O - Count or -1 on error */
1302 conv_utf8_to_vbcs(char *dest
, /* O - Target string */
1303 const cups_utf8_t
*src
, /* I - Source string */
1304 const int maxout
, /* I - Max output */
1305 const cups_encoding_t encoding
) /* I - Encoding */
1307 char *start
= dest
; /* Start of destination string */
1308 cups_vmap_t
*vmap
; /* Legacy DBCS / Unicode Charset Map */
1309 cups_vbcs_t
*vrow
; /* Pointer to VBCS row in 'uni2char' */
1310 cups_utf32_t unichar
; /* Character value */
1311 cups_vbcs_t legchar
; /* Legacy character value */
1312 int worklen
; /* Internal UCS-4 string length */
1313 cups_utf32_t work
[CUPS_MAX_USTRING
];
1314 /* Internal UCS-4 string */
1315 int i
; /* Looping variable */
1318 * Check for valid arguments and clear output...
1323 || (maxout
> CUPS_MAX_USTRING
)
1324 || (encoding
== CUPS_UTF8
))
1329 * Find legacy charset map in cache...
1331 vmap
= (cups_vmap_t
*) cupsCharmapGet(encoding
);
1336 * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
1338 worklen
= cupsUTF8ToUTF32(work
, src
, CUPS_MAX_USTRING
);
1343 * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
1345 for (i
= 0; i
< worklen
;)
1353 * Check for leading BOM (and delete from output)...
1355 if ((i
== 1) && (unichar
== 0xfeff))
1359 * Convert ASCII verbatim (optimization)...
1361 if (unichar
<= 0x7f)
1363 *dest
= (char) unichar
;
1369 * Convert unknown character to visible replacement...
1371 vrow
= vmap
->uni2char
[(int) ((unichar
>> 8) & 0xff)];
1373 vrow
+= (int) (unichar
& 0xff);
1374 if ((vrow
== NULL
) || (*vrow
== 0))
1375 legchar
= (cups_vbcs_t
) '?';
1377 legchar
= (cups_vbcs_t
) *vrow
;
1380 * Save n-byte legacy character...
1382 if (legchar
> 0xffffff)
1384 *dest
= (char) ((legchar
>> 24) & 0xff);
1387 if (legchar
> 0xffff)
1389 *dest
= (char) ((legchar
>> 16) & 0xff);
1394 *dest
= (char) ((legchar
>> 8) & 0xff);
1397 *dest
= (char) (legchar
& 0xff);
1401 worklen
= (int) (dest
- start
);
1402 cupsCharmapFree(encoding
);
1407 * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
1409 static int /* O - Count or -1 on error */
1410 conv_sbcs_to_utf8(cups_utf8_t
*dest
, /* O - Target string */
1411 const char *src
, /* I - Source string */
1412 const int maxout
, /* I - Max output */
1413 const cups_encoding_t encoding
) /* I - Encoding */
1415 cups_cmap_t
*cmap
; /* Legacy SBCS / Unicode Charset Map */
1416 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
1417 unsigned long legchar
; /* Legacy character value */
1418 cups_utf32_t unichar
; /* Unicode character value */
1419 int worklen
; /* Internal UCS-4 string length */
1420 cups_utf32_t work
[CUPS_MAX_USTRING
];
1421 /* Internal UCS-4 string */
1422 int i
; /* Looping variable */
1425 * Check for valid arguments and clear output...
1430 || (maxout
> CUPS_MAX_USTRING
)
1431 || (encoding
== CUPS_UTF8
))
1436 * Find legacy charset map in cache...
1438 cmap
= (cups_cmap_t
*) cupsCharmapGet(encoding
);
1443 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1446 for (i
= 1; i
< (CUPS_MAX_USTRING
- 1); src
++)
1450 legchar
= (unsigned long) *src
;
1453 * Convert ASCII verbatim (optimization)...
1455 if (legchar
<= 0x7f)
1457 work
[i
] = (cups_utf32_t
) legchar
;
1463 * Convert unknown character to Replacement Character...
1465 crow
= &cmap
->char2uni
[0];
1466 crow
+= (int) legchar
;
1470 unichar
= (cups_utf32_t
) *crow
;
1477 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1479 worklen
= cupsUTF32ToUTF8(dest
, work
, maxout
);
1480 cupsCharmapFree(encoding
);
1486 * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
1488 static int /* O - Count or -1 on error */
1489 conv_vbcs_to_utf8(cups_utf8_t
*dest
, /* O - Target string */
1490 const char *src
, /* I - Source string */
1491 const int maxout
, /* I - Max output */
1492 const cups_encoding_t encoding
) /* I - Encoding */
1494 cups_vmap_t
*vmap
; /* Legacy VBCS / Unicode Charset Map */
1495 cups_ucs2_t
*crow
; /* Pointer to UCS-2 row in 'char2uni' */
1496 cups_wide2uni_t
*wide2uni
; /* Pointer to row in 'wide2uni' */
1497 cups_sbcs_t leadchar
; /* Lead char of n-byte legacy char */
1498 cups_vbcs_t legchar
; /* Legacy character value */
1499 cups_utf32_t unichar
; /* Unicode character value */
1500 int i
; /* Looping variable */
1501 int worklen
; /* Internal UCS-4 string length */
1502 cups_utf32_t work
[CUPS_MAX_USTRING
];
1503 /* Internal UCS-4 string */
1506 * Check for valid arguments and clear output...
1511 || (maxout
> CUPS_MAX_USTRING
)
1512 || (encoding
== CUPS_UTF8
))
1517 * Find legacy charset map in cache...
1519 vmap
= (cups_vmap_t
*) cupsCharmapGet(encoding
);
1524 * Convert input legacy charset to internal UCS-4 (and insert BOM)...
1527 for (i
= 1; i
< (CUPS_MAX_USTRING
- 1); src
++)
1531 legchar
= (cups_vbcs_t
) *src
;
1532 leadchar
= (cups_sbcs_t
) *src
;
1535 * Convert ASCII verbatim (optimization)...
1537 if (legchar
<= 0x7f)
1539 work
[i
] = (cups_utf32_t
) legchar
;
1545 * Convert 2-byte legacy character...
1547 if (vmap
->lead2char
[(int) leadchar
] == leadchar
)
1552 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1555 * Convert unknown character to Replacement Character...
1557 crow
= vmap
->char2uni
[(int) ((legchar
>> 8) & 0xff)];
1559 crow
+= (int) (legchar
& 0xff);
1560 if ((crow
== NULL
) || (*crow
== 0))
1563 unichar
= (cups_utf32_t
) *crow
;
1570 * Fetch 3-byte or 4-byte legacy character...
1572 if (vmap
->lead3char
[(int) leadchar
] == leadchar
)
1577 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1581 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1583 else if (vmap
->lead4char
[(int) leadchar
] == leadchar
)
1588 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1592 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1596 legchar
= (legchar
<< 8) | (cups_vbcs_t
) *src
;
1602 * Find 3-byte or 4-byte legacy character...
1604 wide2uni
= vmap
->wide2uni
;
1605 wide2uni
= (cups_wide2uni_t
*) bsearch(&legchar
,
1608 sizeof(cups_wide2uni_t
),
1612 * Convert unknown character to Replacement Character...
1614 if ((wide2uni
== NULL
) || (wide2uni
->unichar
== 0))
1617 unichar
= wide2uni
->unichar
;
1624 * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
1626 worklen
= cupsUTF32ToUTF8(dest
, work
, maxout
);
1627 cupsCharmapFree(encoding
);
1632 * 'compare_wide()' - Compare key for wide (VBCS) match.
1635 compare_wide(const void *k1
, /* I - Key char */
1636 const void *k2
) /* I - Map char */
1638 cups_vbcs_t
*kp
= (cups_vbcs_t
*) k1
;
1639 /* Key char pointer */
1640 cups_wide2uni_t
*mp
= (cups_wide2uni_t
*) k2
;
1641 /* Map char pointer */
1642 cups_vbcs_t key
; /* Legacy key character */
1643 cups_vbcs_t map
; /* Legacy map character */
1644 int result
; /* Result Value */
1649 result
= (int) (key
- map
);
1651 result
= -1 * ((int) (map
- key
));