/*
- * "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
+ * Transcoding support for CUPS.
*
- * Transcoding support for CUPS.
+ * Copyright 2007-2014 by Apple Inc.
+ * Copyright 1997-2007 by Easy Software Products.
*
- * Copyright 2007-2010 by Apple Inc.
- * Copyright 1997-2007 by Easy Software Products.
- *
- * These coded instructions, statements, and computer programs are the
- * property of Apple Inc. and are protected by Federal copyright
- * law. Distribution and use rights are outlined in the file "LICENSE.txt"
- * which should have been included with this file. If this file is
- * file is missing or damaged, see the license at "http://www.cups.org/".
- *
- * This file is subject to the Apple OS-Developed Software exception.
- *
- * Contents:
- *
- * _cupsCharmapFlush() - Flush all character set maps out of cache.
- * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
- * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
- * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
- * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
+ * Licensed under Apache License v2.0. See the file "LICENSE" for more information.
*/
/*
*/
#include "cups-private.h"
+#include "debug-internal.h"
#include <limits.h>
#include <time.h>
#ifdef HAVE_ICONV_H
const cups_encoding_t encoding) /* I - Encoding */
{
cups_utf8_t *destptr; /* Pointer into UTF-8 buffer */
- int bytes; /* Number of bytes converted */
+#ifdef HAVE_ICONV_H
size_t srclen, /* Length of source string */
outBytesLeft; /* Bytes remaining in output buffer */
+#endif /* HAVE_ICONV_H */
/*
* Check for valid arguments...
*/
- DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)",
- dest, src, maxout, encoding));
+ DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)", (void *)dest, src, maxout, encoding));
if (!dest || !src || maxout < 1)
{
if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
encoding >= CUPS_ENCODING_VBCS_END)
{
- strlcpy((char *)dest, src, maxout);
+ strlcpy((char *)dest, src, (size_t)maxout);
return ((int)strlen((char *)dest));
}
if (ch & 128)
{
- *destptr++ = 0xc0 | (ch >> 6);
- *destptr++ = 0x80 | (ch & 0x3f);
+ *destptr++ = (cups_utf8_t)(0xc0 | (ch >> 6));
+ *destptr++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
}
else
- *destptr++ = ch;
+ *destptr++ = (cups_utf8_t)ch;
}
*destptr = '\0';
if (map_encoding != encoding)
{
+ char toset[1024]; /* Destination character set */
+
_cupsCharmapFlush();
+ snprintf(toset, sizeof(toset), "%s//IGNORE", _cupsEncodingName(encoding));
+
+ map_encoding = encoding;
map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
- map_to_utf8 = iconv_open("UTF-8", _cupsEncodingName(encoding));
- map_encoding = encoding;
+ map_to_utf8 = iconv_open("UTF-8", toset);
}
if (map_to_utf8 != (iconv_t)-1)
{
+ char *altdestptr = (char *)dest; /* Silence bogus GCC type-punned */
+
srclen = strlen(src);
- outBytesLeft = maxout - 1;
- bytes = (int)iconv(map_to_utf8, (char **)&src, &srclen,
- (char **)&destptr, &outBytesLeft);
- *destptr = '\0';
+ outBytesLeft = (size_t)maxout - 1;
+
+ iconv(map_to_utf8, (char **)&src, &srclen, &altdestptr, &outBytesLeft);
+ *altdestptr = '\0';
_cupsMutexUnlock(&map_mutex);
- return ((int)(destptr - dest));
+ return ((int)(altdestptr - (char *)dest));
}
_cupsMutexUnlock(&map_mutex);
const cups_encoding_t encoding) /* I - Encoding */
{
char *destptr; /* Pointer into destination */
- int bytes; /* Number of bytes converted */
+#ifdef HAVE_ICONV_H
size_t srclen, /* Length of source string */
outBytesLeft; /* Bytes remaining in output buffer */
+#endif /* HAVE_ICONV_H */
/*
* Handle identity conversions...
*/
- if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
+ if (encoding == CUPS_UTF8 ||
encoding >= CUPS_ENCODING_VBCS_END)
{
- strlcpy(dest, (char *)src, maxout);
+ strlcpy(dest, (char *)src, (size_t)maxout);
return ((int)strlen(dest));
}
destptr = dest;
- if (encoding == CUPS_ISO8859_1)
+ if (encoding == CUPS_ISO8859_1 || encoding <= CUPS_US_ASCII)
{
- int ch; /* Character from string */
+ int ch, /* Character from string */
+ maxch; /* Maximum character for charset */
char *destend; /* End of ISO-8859-1 buffer */
-
+ maxch = encoding == CUPS_ISO8859_1 ? 256 : 128;
destend = dest + maxout - 1;
while (*src && destptr < destend)
{
ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
- if (ch < 256)
- *destptr++ = ch;
+ if (ch < maxch)
+ *destptr++ = (char)ch;
else
*destptr++ = '?';
}
(ch & 0xf8) == 0xf0)
*destptr++ = '?';
else if (!(ch & 0x80))
- *destptr++ = ch;
+ *destptr++ = (char)ch;
}
*destptr = '\0';
if (map_encoding != encoding)
{
+ char toset[1024]; /* Destination character set */
+
_cupsCharmapFlush();
- map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
- map_to_utf8 = iconv_open("UTF-8", _cupsEncodingName(encoding));
+ snprintf(toset, sizeof(toset), "%s//IGNORE", _cupsEncodingName(encoding));
+
map_encoding = encoding;
+ map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
+ map_to_utf8 = iconv_open("UTF-8", toset);
}
if (map_from_utf8 != (iconv_t)-1)
{
+ char *altsrc = (char *)src; /* Silence bogus GCC type-punned */
+
srclen = strlen((char *)src);
- outBytesLeft = maxout - 1;
- bytes = (int)iconv(map_from_utf8, (char **)&src, &srclen,
- &destptr, &outBytesLeft);
- *destptr = '\0';
+ outBytesLeft = (size_t)maxout - 1;
+
+ iconv(map_from_utf8, &altsrc, &srclen, &destptr, &outBytesLeft);
+ *destptr = '\0';
_cupsMutexUnlock(&map_mutex);
* Check for valid arguments and clear output...
*/
- DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", dest,
- src, maxout));
+ DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", (void *)dest, src, maxout));
if (dest)
*dest = 0;
return (-1);
}
- ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
+ ch32 = (cups_utf32_t)((ch & 0x1f) << 6) | (cups_utf32_t)(next & 0x3f);
/*
* Check for non-shortest form (invalid UTF-8)...
return (-1);
}
- ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
+ ch32 = (cups_utf32_t)((ch & 0x0f) << 6) | (cups_utf32_t)(next & 0x3f);
next = *src++;
if ((next & 0xc0) != 0x80)
return (-1);
}
- ch32 = (ch32 << 6) | (next & 0x3f);
+ ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f);
/*
* Check for non-shortest form (invalid UTF-8)...
return (-1);
}
- ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
+ ch32 = (cups_utf32_t)((ch & 0x07) << 6) | (cups_utf32_t)(next & 0x3f);
next = *src++;
if ((next & 0xc0) != 0x80)
return (-1);
}
- ch32 = (ch32 << 6) | (next & 0x3f);
+ ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f);
next = *src++;
if ((next & 0xc0) != 0x80)
return (-1);
}
- ch32 = (ch32 << 6) | (next & 0x3f);
+ ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f);
/*
* Check for non-shortest form (invalid UTF-8)...
* Check for valid arguments and clear output...
*/
- DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", dest, src,
- maxout));
+ DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", (void *)dest, (void *)src, maxout));
if (dest)
*dest = '\0';
return ((int)(dest - start));
}
-
-
-/*
- * 'compare_wide()' - Compare key for wide (VBCS) match.
- */
-
-static int
-compare_wide(const void *k1, /* I - Key char */
- const void *k2) /* I - Map char */
-{
- cups_vbcs_t key; /* Legacy key character */
- cups_vbcs_t map; /* Legacy map character */
-
-
- key = *((cups_vbcs_t *)k1);
- map = ((_cups_wide2uni_t *)k2)->widechar;
-
- return ((int)(key - map));
-}
-
-
-/*
- * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
- */
-
-static int /* O - Count or -1 on error */
-conv_sbcs_to_utf8(
- cups_utf8_t *dest, /* O - Target string */
- const cups_sbcs_t *src, /* I - Source string */
- int maxout, /* I - Max output */
- const cups_encoding_t encoding) /* I - Encoding */
-{
- _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
- cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
- cups_sbcs_t legchar; /* Legacy character value */
- cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
- *workptr; /* Pointer into string */
-
-
- /*
- * Find legacy charset map in cache...
- */
-
- if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
- return (-1);
-
- /*
- * Convert input legacy charset to internal UCS-4 (and insert BOM)...
- */
-
- work[0] = 0xfeff;
- for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
- {
- legchar = *src++;
-
- /*
- * Convert ASCII verbatim (optimization)...
- */
-
- if (legchar < 0x80)
- *workptr++ = (cups_utf32_t)legchar;
- else
- {
- /*
- * Convert unknown character to Replacement Character...
- */
-
- crow = cmap->char2uni + legchar;
-
- if (!*crow)
- *workptr++ = 0xfffd;
- else
- *workptr++ = (cups_utf32_t)*crow;
- }
- }
-
- *workptr = 0;
-
- /*
- * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
- */
-
- cmap->used --;
-
- return (cupsUTF32ToUTF8(dest, work, maxout));
-}
-
-
-/*
- * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
- */
-
-static int /* O - Count or -1 on error */
-conv_utf8_to_sbcs(
- cups_sbcs_t *dest, /* O - Target string */
- const cups_utf8_t *src, /* I - Source string */
- int maxout, /* I - Max output */
- const cups_encoding_t encoding) /* I - Encoding */
-{
- cups_sbcs_t *start; /* Start of destination string */
- _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
- cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
- cups_utf32_t unichar; /* Character value */
- cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
- *workptr; /* Pointer into string */
-
-
- /*
- * Find legacy charset map in cache...
- */
-
- if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
- return (-1);
-
- /*
- * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
- */
-
- if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
- return (-1);
-
- /*
- * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
- */
-
- for (workptr = work, start = dest; *workptr && maxout > 0; maxout --)
- {
- unichar = *workptr++;
- if (!unichar)
- break;
-
- /*
- * Convert ASCII verbatim (optimization)...
- */
-
- if (unichar < 0x80)
- {
- *dest++ = (cups_sbcs_t)unichar;
- continue;
- }
-
- /*
- * Convert unknown character to visible replacement...
- */
-
- srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
-
- if (srow)
- srow += (int)(unichar & 0xff);
-
- if (!srow || !*srow)
- *dest++ = '?';
- else
- *dest++ = *srow;
- }
-
- *dest = '\0';
-
- cmap->used --;
-
- return ((int)(dest - start));
-}
-
-
-/*
- * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
- */
-
-static int /* O - Count or -1 on error */
-conv_utf8_to_vbcs(
- cups_sbcs_t *dest, /* O - Target string */
- const cups_utf8_t *src, /* I - Source string */
- int maxout, /* I - Max output */
- const cups_encoding_t encoding) /* I - Encoding */
-{
- cups_sbcs_t *start; /* Start of destination string */
- _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
- cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
- cups_utf32_t unichar; /* Character value */
- cups_vbcs_t legchar; /* Legacy character value */
- cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
- *workptr; /* Pointer into string */
-
-
- DEBUG_printf(("7conv_utf8_to_vbcs(dest=%p, src=\"%s\", maxout=%d, "
- "encoding=%d)", dest, src, maxout, encoding));
-
- /*
- * Find legacy charset map in cache...
- */
-
- if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
- {
- DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (no charmap)");
-
- return (-1);
- }
-
- /*
- * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
- */
-
- if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
- {
- DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (Unable to convert to UTF-32)");
-
- return (-1);
- }
-
- /*
- * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
- */
-
- for (start = dest, workptr = work; *workptr && maxout > 0; maxout --)
- {
- unichar = *workptr++;
-
- /*
- * Convert ASCII verbatim (optimization)...
- */
-
- if (unichar < 0x80)
- {
- *dest++ = (cups_sbcs_t)unichar;
-
- DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X", (unsigned)unichar,
- dest[-1]));
-
- continue;
- }
-
- /*
- * Convert unknown character to visible replacement...
- */
-
- vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
-
- if (vrow)
- vrow += (int)(unichar & 0xff);
-
- if (!vrow || !*vrow)
- legchar = (cups_vbcs_t)'?';
- else
- legchar = (cups_vbcs_t)*vrow;
-
- /*
- * Save n-byte legacy character...
- */
-
- if (legchar > 0xffffff)
- {
- if (maxout < 5)
- {
- DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
-
- return (-1);
- }
-
- *dest++ = (cups_sbcs_t)(legchar >> 24);
- *dest++ = (cups_sbcs_t)(legchar >> 16);
- *dest++ = (cups_sbcs_t)(legchar >> 8);
- *dest++ = (cups_sbcs_t)legchar;
-
- maxout -= 3;
-
- DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X %02X",
- (unsigned)unichar, dest[-4], dest[-3], dest[-2], dest[-1]));
- }
- else if (legchar > 0xffff)
- {
- if (maxout < 4)
- {
- DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
-
- return (-1);
- }
-
- *dest++ = (cups_sbcs_t)(legchar >> 16);
- *dest++ = (cups_sbcs_t)(legchar >> 8);
- *dest++ = (cups_sbcs_t)legchar;
-
- maxout -= 2;
-
- DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X",
- (unsigned)unichar, dest[-3], dest[-2], dest[-1]));
- }
- else if (legchar > 0xff)
- {
- *dest++ = (cups_sbcs_t)(legchar >> 8);
- *dest++ = (cups_sbcs_t)legchar;
-
- maxout --;
-
- DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X",
- (unsigned)unichar, dest[-2], dest[-1]));
- }
- else
- {
- *dest++ = (cups_sbcs_t)legchar;
-
- DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X",
- (unsigned)unichar, dest[-1]));
- }
- }
-
- *dest = '\0';
-
- vmap->used --;
-
- DEBUG_printf(("8conv_utf8_to_vbcs: Returning %d characters",
- (int)(dest - start)));
-
- return ((int)(dest - start));
-}
-
-
-/*
- * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
- */
-
-static int /* O - Count or -1 on error */
-conv_vbcs_to_utf8(
- cups_utf8_t *dest, /* O - Target string */
- const cups_sbcs_t *src, /* I - Source string */
- int maxout, /* I - Max output */
- const cups_encoding_t encoding) /* I - Encoding */
-{
- _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
- cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
- _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
- cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
- cups_vbcs_t legchar; /* Legacy character value */
- cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
- *workptr; /* Pointer into string */
-
-
- /*
- * Find legacy charset map in cache...
- */
-
- DEBUG_printf(("7conv_vbcs_to_utf8(dest=%p, src=%p, maxout=%d, encoding=%d)",
- dest, src, maxout, encoding));
-
- if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
- {
- DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (NULL vmap)");
-
- return (-1);
- }
-
- /*
- * Convert input legacy charset to internal UCS-4 (and insert BOM)...
- */
-
- work[0] = 0xfeff;
- for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
- {
- legchar = *src++;
- leadchar = (cups_sbcs_t)legchar;
-
- /*
- * Convert ASCII verbatim (optimization)...
- */
-
- if (legchar < 0x80)
- {
- *workptr++ = (cups_utf32_t)legchar;
-
- DEBUG_printf(("9conv_vbcs_to_utf8: %02X => %08X", src[-1],
- (unsigned)legchar));
- continue;
- }
-
- /*
- * Convert 2-byte legacy character...
- */
-
- if (vmap->lead2char[(int)leadchar] == leadchar)
- {
- if (!*src)
- {
- DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string)");
-
- return (-1);
- }
-
- legchar = (legchar << 8) | *src++;
-
- /*
- * Convert unknown character to Replacement Character...
- */
-
- crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
- if (crow)
- crow += (int) (legchar & 0xff);
-
- if (!crow || !*crow)
- *workptr++ = 0xfffd;
- else
- *workptr++ = (cups_utf32_t)*crow;
-
- DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X => %08X",
- src[-2], src[-1], (unsigned)workptr[-1]));
- continue;
- }
-
- /*
- * Fetch 3-byte or 4-byte legacy character...
- */
-
- if (vmap->lead3char[(int)leadchar] == leadchar)
- {
- if (!*src || !src[1])
- {
- DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 2)");
-
- return (-1);
- }
-
- legchar = (legchar << 8) | *src++;
- legchar = (legchar << 8) | *src++;
- }
- else if (vmap->lead4char[(int)leadchar] == leadchar)
- {
- if (!*src || !src[1] || !src[2])
- {
- DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 3)");
-
- return (-1);
- }
-
- legchar = (legchar << 8) | *src++;
- legchar = (legchar << 8) | *src++;
- legchar = (legchar << 8) | *src++;
- }
- else
- {
- DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (bad character)");
-
- return (-1);
- }
-
- /*
- * Find 3-byte or 4-byte legacy character...
- */
-
- wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
- vmap->wide2uni,
- vmap->widecount,
- sizeof(_cups_wide2uni_t),
- compare_wide);
-
- /*
- * Convert unknown character to Replacement Character...
- */
-
- if (!wide2uni || !wide2uni->unichar)
- *workptr++ = 0xfffd;
- else
- *workptr++ = wide2uni->unichar;
-
- if (vmap->lead3char[(int)leadchar] == leadchar)
- DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X => %08X",
- src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
- else
- DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X %02X => %08X",
- src[-4], src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
- }
-
- *workptr = 0;
-
- vmap->used --;
-
- DEBUG_printf(("9conv_vbcs_to_utf8: Converting %d UTF-32 characters to UTF-8",
- (int)(workptr - work)));
-
- /*
- * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
- */
-
- return (cupsUTF32ToUTF8(dest, work, maxout));
-}
-
-
-/*
- * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
- */
-
-static void
-free_sbcs_charmap(_cups_cmap_t *cmap) /* I - Character set */
-{
- int i; /* Looping variable */
-
-
- for (i = 0; i < 256; i ++)
- if (cmap->uni2char[i])
- free(cmap->uni2char[i]);
-
- free(cmap);
-}
-
-
-/*
- * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
- */
-
-static void
-free_vbcs_charmap(_cups_vmap_t *vmap) /* I - Character set */
-{
- int i; /* Looping variable */
-
-
- for (i = 0; i < 256; i ++)
- if (vmap->char2uni[i])
- free(vmap->char2uni[i]);
-
- for (i = 0; i < 256; i ++)
- if (vmap->uni2char[i])
- free(vmap->uni2char[i]);
-
- if (vmap->wide2uni)
- free(vmap->wide2uni);
-
- free(vmap);
-}
-
-
-/*
- * 'get_charmap()' - Lookup or get a character set map (private).
- *
- * This code handles single-byte (SBCS), double-byte (DBCS), and
- * variable-byte (VBCS) character sets _without_ charset escapes...
- * This code does not handle multiple-byte character sets (MBCS)
- * (such as ISO-2022-JP) with charset switching via escapes...
- */
-
-
-static void * /* O - Charset map pointer */
-get_charmap(
- const cups_encoding_t encoding) /* I - Encoding */
-{
- char filename[1024]; /* Filename for charset map file */
- _cups_globals_t *cg = _cupsGlobals(); /* Global data */
-
-
- DEBUG_printf(("7get_charmap(encoding=%d)", encoding));
-
- /*
- * Get the data directory and charset map name...
- */
-
- snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
- cg->cups_datadir, _cupsEncodingName(encoding));
-
- DEBUG_printf(("9get_charmap: filename=\"%s\"", filename));
-
- /*
- * Read charset map input file into cache...
- */
-
- if (encoding < CUPS_ENCODING_SBCS_END)
- return (get_sbcs_charmap(encoding, filename));
- else if (encoding < CUPS_ENCODING_VBCS_END)
- return (get_vbcs_charmap(encoding, filename));
- else
- return (NULL);
-}
-
-
-/*
- * 'get_charmap_count()' - Count lines in a charmap file.
- */
-
-static int /* O - Count or -1 on error */
-get_charmap_count(cups_file_t *fp) /* I - File to read from */
-{
- int count; /* Number of lines */
- char line[256]; /* Line from input map file */
-
-
- /*
- * Count lines in map input file...
- */
-
- count = 0;
-
- while (cupsFileGets(fp, line, sizeof(line)))
- if (line[0] == '0')
- count ++;
-
- /*
- * Return the number of lines...
- */
-
- if (count > 0)
- return (count);
- else
- return (-1);
-}
-
-
-/*
- * 'get_sbcs_charmap()' - Get SBCS Charmap.
- */
-
-static _cups_cmap_t * /* O - Charmap or 0 on error */
-get_sbcs_charmap(
- const cups_encoding_t encoding, /* I - Charmap Encoding */
- const char *filename) /* I - Charmap Filename */
-{
- unsigned long legchar; /* Legacy character value */
- cups_utf32_t unichar; /* Unicode character value */
- _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
- cups_file_t *fp; /* Charset map file pointer */
- char *s; /* Line parsing pointer */
- cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
- cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
- char line[256]; /* Line from charset map file */
-
-
- /*
- * See if we already have this SBCS charset map loaded...
- */
-
- DEBUG_printf(("7get_sbcs_charmap(encoding=%d, filename=\"%s\")", encoding,
- filename));
-
- for (cmap = cmap_cache; cmap; cmap = cmap->next)
- {
- if (cmap->encoding == encoding)
- {
- cmap->used ++;
- DEBUG_printf(("8get_sbcs_charmap: Returning existing cmap=%p", cmap));
-
- return ((void *)cmap);
- }
- }
-
- /*
- * Open SBCS charset map input file...
- */
-
- if ((fp = cupsFileOpen(filename, "r")) == NULL)
- {
- DEBUG_printf(("8get_sbcs_charmap: Returning NULL (%s)", strerror(errno)));
-
- return (NULL);
- }
-
- /*
- * Allocate memory for SBCS charset map...
- */
-
- if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
- {
- cupsFileClose(fp);
- DEBUG_puts("8get_sbcs_charmap: Returning NULL (Unable to allocate memory)");
-
- return (NULL);
- }
-
- cmap->used ++;
- cmap->encoding = encoding;
-
- /*
- * Save SBCS charset map into memory for transcoding...
- */
-
- while (cupsFileGets(fp, line, sizeof(line)))
- {
- if (line[0] != '0')
- continue;
-
- legchar = strtol(line, &s, 16);
- if (legchar < 0 || legchar > 0xff)
- goto sbcs_error;
-
- unichar = strtol(s, NULL, 16);
- if (unichar < 0 || unichar > 0x10ffff)
- goto sbcs_error;
-
- /*
- * Save legacy to Unicode mapping in direct lookup table...
- */
-
- crow = cmap->char2uni + legchar;
- *crow = (cups_ucs2_t)(unichar & 0xffff);
-
- /*
- * Save Unicode to legacy mapping in indirect lookup table...
- */
-
- srow = cmap->uni2char[(unichar >> 8) & 0xff];
- if (!srow)
- {
- srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
- if (!srow)
- goto sbcs_error;
-
- cmap->uni2char[(unichar >> 8) & 0xff] = srow;
- }
-
- srow += unichar & 0xff;
-
- /*
- * Convert Replacement Character to visible replacement...
- */
-
- if (unichar == 0xfffd)
- legchar = (unsigned long)'?';
-
- /*
- * First (oldest) legacy character uses Unicode mapping cell...
- */
-
- if (!*srow)
- *srow = (cups_sbcs_t)legchar;
- }
-
- cupsFileClose(fp);
-
- /*
- * Add it to the cache and return...
- */
-
- cmap->next = cmap_cache;
- cmap_cache = cmap;
-
- DEBUG_printf(("8get_sbcs_charmap: Returning new cmap=%p", cmap));
-
- return (cmap);
-
- /*
- * If we get here, there was an error in the cmap file...
- */
-
- sbcs_error:
-
- free_sbcs_charmap(cmap);
-
- cupsFileClose(fp);
-
- DEBUG_puts("8get_sbcs_charmap: Returning NULL (Read/format error)");
-
- return (NULL);
-}
-
-
-/*
- * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
- */
-
-static _cups_vmap_t * /* O - Charmap or 0 on error */
-get_vbcs_charmap(
- const cups_encoding_t encoding, /* I - Charmap Encoding */
- const char *filename) /* I - Charmap Filename */
-{
- _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
- cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
- cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
- _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
- cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
- unsigned long legchar; /* Legacy character value */
- cups_utf32_t unichar; /* Unicode character value */
- int mapcount; /* Count of lines in charmap file */
- cups_file_t *fp; /* Charset map file pointer */
- char *s; /* Line parsing pointer */
- char line[256]; /* Line from charset map file */
- int i; /* Loop variable */
- int legacy; /* 32-bit legacy char */
-
-
- DEBUG_printf(("7get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
- encoding, filename));
-
- /*
- * See if we already have this DBCS/VBCS charset map loaded...
- */
-
- for (vmap = vmap_cache; vmap; vmap = vmap->next)
- {
- if (vmap->encoding == encoding)
- {
- vmap->used ++;
- DEBUG_printf(("8get_vbcs_charmap: Returning existing vmap=%p", vmap));
-
- return ((void *)vmap);
- }
- }
-
- /*
- * Open VBCS charset map input file...
- */
-
- if ((fp = cupsFileOpen(filename, "r")) == NULL)
- {
- DEBUG_printf(("8get_vbcs_charmap: Returning NULL (%s)", strerror(errno)));
-
- return (NULL);
- }
-
- /*
- * Count lines in charmap file...
- */
-
- if ((mapcount = get_charmap_count(fp)) <= 0)
- {
- DEBUG_puts("8get_vbcs_charmap: Unable to get charmap count!");
-
- cupsFileClose(fp);
-
- return (NULL);
- }
-
- DEBUG_printf(("8get_vbcs_charmap: mapcount=%d", mapcount));
-
- /*
- * Allocate memory for DBCS/VBCS charset map...
- */
-
- if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
- {
- DEBUG_puts("8get_vbcs_charmap: Unable to allocate memory!");
-
- cupsFileClose(fp);
-
- return (NULL);
- }
-
- vmap->used ++;
- vmap->encoding = encoding;
-
- /*
- * Save DBCS/VBCS charset map into memory for transcoding...
- */
-
- wide2uni = NULL;
-
- cupsFileRewind(fp);
-
- i = 0;
- legacy = 0;
-
- while (cupsFileGets(fp, line, sizeof(line)))
- {
- if (line[0] != '0')
- continue;
-
- legchar = strtoul(line, &s, 16);
- if (legchar == ULONG_MAX)
- goto vbcs_error;
-
- unichar = strtol(s, NULL, 16);
- if (unichar < 0 || unichar > 0x10ffff)
- goto vbcs_error;
-
- i ++;
-
- DEBUG_printf(("9get_vbcs_charmap: i=%d, legchar=0x%08lx, unichar=0x%04x", i,
- legchar, (unsigned)unichar));
-
- /*
- * Save lead char of 2/3/4-byte legacy char...
- */
-
- if (legchar > 0xffffff)
- {
- leadchar = (cups_sbcs_t)(legchar >> 24);
- vmap->lead4char[leadchar] = leadchar;
- }
- else if (legchar > 0xffff)
- {
- leadchar = (cups_sbcs_t)(legchar >> 16);
- vmap->lead3char[leadchar] = leadchar;
- }
- else
- {
- leadchar = (cups_sbcs_t)(legchar >> 8);
- vmap->lead2char[leadchar] = leadchar;
- }
-
- /*
- * Save Legacy to Unicode mapping...
- */
-
- if (legchar <= 0xffff)
- {
- /*
- * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
- */
-
- crow = vmap->char2uni[(int)leadchar];
- if (!crow)
- {
- crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
- if (!crow)
- goto vbcs_error;
-
- vmap->char2uni[(int)leadchar] = crow;
- }
-
- crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
- }
- else
- {
- /*
- * Save VBCS 32-bit to Unicode mapping in sorted list table...
- */
-
- if (!legacy)
- {
- legacy = 1;
- vmap->widecount = (mapcount - i + 1);
- wide2uni = (_cups_wide2uni_t *)calloc(vmap->widecount,
- sizeof(_cups_wide2uni_t));
- if (!wide2uni)
- goto vbcs_error;
-
- vmap->wide2uni = wide2uni;
- }
-
- wide2uni->widechar = (cups_vbcs_t)legchar;
- wide2uni->unichar = (cups_ucs2_t)unichar;
- wide2uni ++;
- }
-
- /*
- * Save Unicode to legacy mapping in indirect lookup table...
- */
-
- vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
- if (!vrow)
- {
- vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
- if (!vrow)
- goto vbcs_error;
-
- vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
- }
-
- vrow += (int)(unichar & 0xff);
-
- /*
- * Convert Replacement Character to visible replacement...
- */
-
- if (unichar == 0xfffd)
- legchar = (unsigned long)'?';
-
- /*
- * First (oldest) legacy character uses Unicode mapping cell...
- */
-
- if (!*vrow)
- *vrow = (cups_vbcs_t)legchar;
- }
-
- vmap->charcount = (i - vmap->widecount);
-
- cupsFileClose(fp);
-
- /*
- * Add it to the cache and return...
- */
-
- vmap->next = vmap_cache;
- vmap_cache = vmap;
-
- DEBUG_printf(("8get_vbcs_charmap: Returning new vmap=%p", vmap));
-
- return (vmap);
-
- /*
- * If we get here, the file contains errors...
- */
-
- vbcs_error:
-
- free_vbcs_charmap(vmap);
-
- cupsFileClose(fp);
-
- DEBUG_puts("8get_vbcs_charmap: Returning NULL (Read/format error)");
-
- return (NULL);
-}
-
-
-/*
- * End of "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
- */