/*
- * "$Id: transcode.c 5373 2006-04-06 20:03:32Z mike $"
+ * "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
*
- * Transcoding support for the Common UNIX Printing System (CUPS).
+ * Transcoding support for CUPS.
*
- * Copyright 1997-2006 by Easy Software Products.
+ * Copyright 2007-2010 by Apple Inc.
+ * Copyright 1997-2007 by Easy Software Products.
*
- * These coded instructions, statements, and computer programs are
- * the property of Easy Software Products and are protected by Federal
- * copyright law. Distribution and use rights are outlined in the
- * file "LICENSE.txt" which should have been included with this file.
- * If this file is missing or damaged please contact Easy Software
- * Products at:
+ * These coded instructions, statements, and computer programs are the
+ * property of Apple Inc. and are protected by Federal copyright
+ * law. Distribution and use rights are outlined in the file "LICENSE.txt"
+ * which should have been included with this file. If this file is
+ * file is missing or damaged, see the license at "http://www.cups.org/".
*
- * Attn: CUPS Licensing Information
- * Easy Software Products
- * 44141 Airport View Drive, Suite 204
- * Hollywood, Maryland 20636 USA
- *
- * Voice: (301) 373-9600
- * EMail: cups-info@cups.org
- * WWW: http://www.cups.org
+ * This file is subject to the Apple OS-Developed Software exception.
*
* Contents:
*
* _cupsCharmapFlush() - Flush all character set maps out of cache.
- * _cupsCharmapFree() - Free a character set map.
- * _cupsCharmapGet() - Get a character set map.
* cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
* cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
* cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
* cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
- * compare_wide() - Compare key for wide (VBCS) match.
- * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
- * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
- * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
- * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
- * free_sbcs_charmap() - Free memory used by a single byte character set.
- * free_vbcs_charmap() - Free memory used by a variable byte character set.
- * get_charmap() - Lookup or get a character set map (private).
- * get_charmap_count() - Count lines in a charmap file.
- * get_sbcs_charmap() - Get SBCS Charmap.
- * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
*/
/*
* Include necessary headers...
*/
-#include "globals.h"
-#include "debug.h"
+#include "cups-private.h"
#include <limits.h>
-#include <stdlib.h>
-#include <errno.h>
#include <time.h>
+#ifdef HAVE_ICONV_H
+# include <iconv.h>
+#endif /* HAVE_ICONV_H */
/*
* Local globals...
*/
-#ifdef HAVE_PTHREAD_H
-static pthread_mutex_t map_mutex = PTHREAD_MUTEX_INITIALIZER;
+#ifdef HAVE_ICONV_H
+static _cups_mutex_t map_mutex = _CUPS_MUTEX_INITIALIZER;
/* Mutex to control access to maps */
-#endif /* HAVE_PTHREAD_H */
-static _cups_cmap_t *cmap_cache = NULL;
- /* SBCS Charmap Cache */
-static _cups_vmap_t *vmap_cache = NULL;
- /* VBCS Charmap Cache */
-
-
-/*
- * Local functions...
- */
-
-static int compare_wide(const void *k1, const void *k2);
-static int conv_sbcs_to_utf8(cups_utf8_t *dest,
- const cups_sbcs_t *src,
- int maxout,
- const cups_encoding_t encoding);
-static int conv_utf8_to_sbcs(cups_sbcs_t *dest,
- const cups_utf8_t *src,
- int maxout,
- const cups_encoding_t encoding);
-static int conv_utf8_to_vbcs(cups_sbcs_t *dest,
- const cups_utf8_t *src,
- int maxout,
- const cups_encoding_t encoding);
-static int conv_vbcs_to_utf8(cups_utf8_t *dest,
- const cups_sbcs_t *src,
- int maxout,
- const cups_encoding_t encoding);
-static void free_sbcs_charmap(_cups_cmap_t *sbcs);
-static void free_vbcs_charmap(_cups_vmap_t *vbcs);
-static void *get_charmap(const cups_encoding_t encoding);
-static int get_charmap_count(cups_file_t *fp);
-static _cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding,
- const char *filename);
-static _cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding,
- const char *filename);
+static iconv_t map_from_utf8 = (iconv_t)-1;
+ /* Convert from UTF-8 to charset */
+static iconv_t map_to_utf8 = (iconv_t)-1;
+ /* Convert from charset to UTF-8 */
+static cups_encoding_t map_encoding = CUPS_AUTO_ENCODING;
+ /* Which charset is cached */
+#endif /* HAVE_ICONV_H */
/*
void
_cupsCharmapFlush(void)
{
- _cups_cmap_t *cmap, /* Legacy SBCS / Unicode Charset Map */
- *cnext; /* Next Legacy SBCS Charset Map */
- _cups_vmap_t *vmap, /* Legacy VBCS / Unicode Charset Map */
- *vnext; /* Next Legacy VBCS Charset Map */
-
-
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_lock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
-
- /*
- * Loop through SBCS charset map cache, free all memory...
- */
-
- for (cmap = cmap_cache; cmap; cmap = cnext)
+#ifdef HAVE_ICONV_H
+ if (map_from_utf8 != (iconv_t)-1)
{
- cnext = cmap->next;
-
- free_sbcs_charmap(cmap);
+ iconv_close(map_from_utf8);
+ map_from_utf8 = (iconv_t)-1;
}
- cmap_cache = NULL;
-
- /*
- * Loop through DBCS/VBCS charset map cache, free all memory...
- */
-
- for (vmap = vmap_cache; vmap; vmap = vnext)
+ if (map_to_utf8 != (iconv_t)-1)
{
- vnext = vmap->next;
-
- free_vbcs_charmap(vmap);
-
- free(vmap);
+ iconv_close(map_to_utf8);
+ map_to_utf8 = (iconv_t)-1;
}
- vmap_cache = NULL;
-
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_unlock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
+ map_encoding = CUPS_AUTO_ENCODING;
+#endif /* HAVE_ICONV_H */
}
/*
- * '_cupsCharmapFree()' - Free a character set map.
- *
- * This does not actually free; use '_cupsCharmapFlush()' for that.
+ * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
*/
-void
-_cupsCharmapFree(
+int /* O - Count or -1 on error */
+cupsCharsetToUTF8(
+ cups_utf8_t *dest, /* O - Target string */
+ const char *src, /* I - Source string */
+ const int maxout, /* I - Max output */
const cups_encoding_t encoding) /* I - Encoding */
{
- _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
- _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
+ cups_utf8_t *destptr; /* Pointer into UTF-8 buffer */
+#ifdef HAVE_ICONV_H
+ size_t srclen, /* Length of source string */
+ outBytesLeft; /* Bytes remaining in output buffer */
+#endif /* HAVE_ICONV_H */
/*
- * See if we already have this SBCS charset map loaded...
+ * Check for valid arguments...
*/
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_lock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
+ DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)",
+ dest, src, maxout, encoding));
- for (cmap = cmap_cache; cmap; cmap = cmap->next)
+ if (!dest || !src || maxout < 1)
{
- if (cmap->encoding == encoding)
- {
- if (cmap->used > 0)
- cmap->used --;
- break;
- }
- }
-
- /*
- * See if we already have this DBCS/VBCS charset map loaded...
- */
+ if (dest)
+ *dest = '\0';
- for (vmap = vmap_cache; vmap; vmap = vmap->next)
- {
- if (vmap->encoding == encoding)
- {
- if (vmap->used > 0)
- vmap->used --;
- break;
- }
+ DEBUG_puts("3cupsCharsetToUTF8: Bad arguments, returning -1");
+ return (-1);
}
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_unlock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
-}
-
-
-/*
- * '_cupsCharmapGet()' - Get a character set map.
- *
- * This code handles single-byte (SBCS), double-byte (DBCS), and
- * variable-byte (VBCS) character sets _without_ charset escapes...
- * This code does not handle multiple-byte character sets (MBCS)
- * (such as ISO-2022-JP) with charset switching via escapes...
- */
-
-void * /* O - Charset map pointer */
-_cupsCharmapGet(
- const cups_encoding_t encoding) /* I - Encoding */
-{
- void *charmap; /* Charset map pointer */
-
-
- DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding));
-
/*
- * Check for valid arguments...
+ * Handle identity conversions...
*/
- if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
+ if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
+ encoding >= CUPS_ENCODING_VBCS_END)
{
- DEBUG_puts(" Bad encoding, returning NULL!");
- return (NULL);
+ strlcpy((char *)dest, src, maxout);
+ return ((int)strlen((char *)dest));
}
/*
- * Lookup or get the charset map pointer and return...
+ * Handle ISO-8859-1 to UTF-8 directly...
*/
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_lock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
+ destptr = dest;
- charmap = get_charmap(encoding);
+ if (encoding == CUPS_ISO8859_1)
+ {
+ int ch; /* Character from string */
+ cups_utf8_t *destend; /* End of UTF-8 buffer */
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_unlock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
- return (charmap);
-}
+ destend = dest + maxout - 2;
+ while (*src && destptr < destend)
+ {
+ ch = *src++ & 255;
-/*
- * 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
- *
- * This code handles single-byte (SBCS), double-byte (DBCS), and
- * variable-byte (VBCS) character sets _without_ charset escapes...
- * This code does not handle multiple-byte character sets (MBCS)
- * (such as ISO-2022-JP) with charset switching via escapes...
- */
+ if (ch & 128)
+ {
+ *destptr++ = 0xc0 | (ch >> 6);
+ *destptr++ = 0x80 | (ch & 0x3f);
+ }
+ else
+ *destptr++ = ch;
+ }
-int /* O - Count or -1 on error */
-cupsCharsetToUTF8(
- cups_utf8_t *dest, /* O - Target string */
- const char *src, /* I - Source string */
- const int maxout, /* I - Max output */
- const cups_encoding_t encoding) /* I - Encoding */
-{
- int bytes; /* Number of bytes converted */
+ *destptr = '\0';
+ return ((int)(destptr - dest));
+ }
/*
- * Check for valid arguments...
+ * Convert input legacy charset to UTF-8...
*/
- DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n",
- dest, src, maxout, encoding));
+#ifdef HAVE_ICONV_H
+ _cupsMutexLock(&map_mutex);
- if (dest)
- *dest = '\0';
-
- if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
+ if (map_encoding != encoding)
{
- DEBUG_puts(" Bad arguments, returning -1");
- return (-1);
- }
+ _cupsCharmapFlush();
- /*
- * Handle identity conversions...
- */
+ map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
+ map_to_utf8 = iconv_open("UTF-8", _cupsEncodingName(encoding));
+ map_encoding = encoding;
+ }
- if (encoding == CUPS_UTF8 ||
- encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
+ if (map_to_utf8 != (iconv_t)-1)
{
- strlcpy((char *)dest, src, maxout);
- return (strlen((char *)dest));
- }
+ srclen = strlen(src);
+ outBytesLeft = maxout - 1;
- /*
- * Convert input legacy charset to UTF-8...
- */
+ iconv(map_to_utf8, (char **)&src, &srclen, (char **)&destptr,
+ &outBytesLeft);
+ *destptr = '\0';
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_lock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
+ _cupsMutexUnlock(&map_mutex);
- if (encoding < CUPS_ENCODING_SBCS_END)
- bytes = conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
- else if (encoding < CUPS_ENCODING_VBCS_END)
- bytes = conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
- else
- {
- DEBUG_puts(" Bad encoding, returning -1");
- bytes = -1;
+ return ((int)(destptr - dest));
}
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_unlock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
+ _cupsMutexUnlock(&map_mutex);
+#endif /* HAVE_ICONV_H */
- return (bytes);
+ /*
+ * No iconv() support, so error out...
+ */
+
+ *destptr = '\0';
+
+ return (-1);
}
/*
* 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
- *
- * This code handles single-byte (SBCS), double-byte (DBCS), and
- * variable-byte (VBCS) character sets _without_ charset escapes...
- * This code does not handle multiple-byte character sets (MBCS)
- * (such as ISO-2022-JP) with charset switching via escapes...
*/
int /* O - Count or -1 on error */
const int maxout, /* I - Max output */
const cups_encoding_t encoding) /* I - Encoding */
{
- int bytes; /* Number of bytes converted */
+ char *destptr; /* Pointer into destination */
+#ifdef HAVE_ICONV_H
+ size_t srclen, /* Length of source string */
+ outBytesLeft; /* Bytes remaining in output buffer */
+#endif /* HAVE_ICONV_H */
/*
* Check for valid arguments...
*/
- if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
+ if (!dest || !src || maxout < 1)
{
if (dest)
*dest = '\0';
* Handle identity conversions...
*/
- if (encoding == CUPS_UTF8 ||
- encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
+ if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
+ encoding >= CUPS_ENCODING_VBCS_END)
{
strlcpy(dest, (char *)src, maxout);
- return (strlen(dest));
+ return ((int)strlen(dest));
+ }
+
+ /*
+ * Handle UTF-8 to ISO-8859-1 directly...
+ */
+
+ destptr = dest;
+
+ if (encoding == CUPS_ISO8859_1)
+ {
+ int ch; /* Character from string */
+ char *destend; /* End of ISO-8859-1 buffer */
+
+
+ destend = dest + maxout - 1;
+
+ while (*src && destptr < destend)
+ {
+ ch = *src++;
+
+ if ((ch & 0xe0) == 0xc0)
+ {
+ ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
+
+ if (ch < 256)
+ *destptr++ = ch;
+ else
+ *destptr++ = '?';
+ }
+ else if ((ch & 0xf0) == 0xe0 ||
+ (ch & 0xf8) == 0xf0)
+ *destptr++ = '?';
+ else if (!(ch & 0x80))
+ *destptr++ = ch;
+ }
+
+ *destptr = '\0';
+
+ return ((int)(destptr - dest));
}
+#ifdef HAVE_ICONV_H
/*
* Convert input UTF-8 to legacy charset...
*/
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_lock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
+ _cupsMutexLock(&map_mutex);
- if (encoding < CUPS_ENCODING_SBCS_END)
- bytes = conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding);
- else if (encoding < CUPS_ENCODING_VBCS_END)
- bytes = conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding);
- else
- bytes = -1;
+ if (map_encoding != encoding)
+ {
+ _cupsCharmapFlush();
+
+ map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
+ map_to_utf8 = iconv_open("UTF-8", _cupsEncodingName(encoding));
+ map_encoding = encoding;
+ }
+
+ if (map_from_utf8 != (iconv_t)-1)
+ {
+ srclen = strlen((char *)src);
+ outBytesLeft = maxout - 1;
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_unlock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
+ iconv(map_from_utf8, (char **)&src, &srclen, &destptr, &outBytesLeft);
+ *destptr = '\0';
- return (bytes);
+ _cupsMutexUnlock(&map_mutex);
+
+ return ((int)(destptr - dest));
+ }
+
+ _cupsMutexUnlock(&map_mutex);
+#endif /* HAVE_ICONV_H */
+
+ /*
+ * No iconv() support, so error out...
+ */
+
+ *destptr = '\0';
+
+ return (-1);
}
const cups_utf8_t *src, /* I - Source string */
const int maxout) /* I - Max output */
{
- size_t srclen; /* Source string length */
int i; /* Looping variable */
cups_utf8_t ch; /* Character value */
cups_utf8_t next; /* Next character value */
* Check for valid arguments and clear output...
*/
+ DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", dest,
+ src, maxout));
+
if (dest)
*dest = 0;
if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
+ {
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad arguments)");
+
return (-1);
+ }
/*
- * Convert input UTF-8 to output UTF-32 (and insert BOM)...
+ * Convert input UTF-8 to output UTF-32...
*/
- *dest++ = 0xfeff;
- srclen = strlen((char *)src);
-
for (i = maxout - 1; *src && i > 0; i --)
{
ch = *src++;
*/
*dest++ = ch;
+
+ DEBUG_printf(("4cupsUTF8ToUTF32: %02x => %08X", src[-1], ch));
+ continue;
}
else if ((ch & 0xe0) == 0xc0)
{
*/
next = *src++;
- if (!next)
+ if ((next & 0xc0) != 0x80)
+ {
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+
return (-1);
+ }
ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
*/
if (ch32 < 0x80)
+ {
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+
return (-1);
+ }
*dest++ = ch32;
+
+ DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x => %08X",
+ src[-2], src[-1], (unsigned)ch32));
}
else if ((ch & 0xf0) == 0xe0)
{
*/
next = *src++;
- if (!next)
+ if ((next & 0xc0) != 0x80)
+ {
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+
return (-1);
+ }
ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
next = *src++;
- if (!next)
+ if ((next & 0xc0) != 0x80)
+ {
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+
return (-1);
+ }
ch32 = (ch32 << 6) | (next & 0x3f);
*/
if (ch32 < 0x800)
+ {
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+
return (-1);
+ }
*dest++ = ch32;
+
+ DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x => %08X",
+ src[-3], src[-2], src[-1], (unsigned)ch32));
}
else if ((ch & 0xf8) == 0xf0)
{
*/
next = *src++;
- if (!next)
+ if ((next & 0xc0) != 0x80)
+ {
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+
return (-1);
+ }
ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
next = *src++;
- if (!next)
+ if ((next & 0xc0) != 0x80)
+ {
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+
return (-1);
+ }
ch32 = (ch32 << 6) | (next & 0x3f);
next = *src++;
- if (!next)
+ if ((next & 0xc0) != 0x80)
+ {
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+
return (-1);
+ }
ch32 = (ch32 << 6) | (next & 0x3f);
*/
if (ch32 < 0x10000)
+ {
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+
return (-1);
+ }
*dest++ = ch32;
+
+ DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X",
+ src[-4], src[-3], src[-2], src[-1], (unsigned)ch32));
}
else
{
* More than 4-octet (invalid UTF-8 sequence)...
*/
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+
return (-1);
}
* Check for UTF-16 surrogate (illegal UTF-8)...
*/
- if (*dest >= 0xd800 && *dest <= 0xdfff)
+ if (ch32 >= 0xd800 && ch32 <= 0xdfff)
return (-1);
}
*dest = 0;
- return (i);
+ DEBUG_printf(("3cupsUTF8ToUTF32: Returning %d characters", maxout - 1 - i));
+
+ return (maxout - 1 - i);
}
* Check for valid arguments and clear output...
*/
+ DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", dest, src,
+ maxout));
+
if (dest)
*dest = '\0';
if (!dest || !src || maxout < 1)
+ {
+ DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (bad args)");
+
return (-1);
+ }
/*
* Check for leading BOM in UTF-32 and inverted BOM...
start = dest;
swap = *src == 0xfffe0000;
+ DEBUG_printf(("4cupsUTF32ToUTF8: swap=%d", swap));
+
if (*src == 0xfffe0000 || *src == 0xfeff)
src ++;
*/
if (ch > 0x10ffff)
+ {
+ DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (character out of range)");
+
return (-1);
+ }
/*
* Convert UTF-32 character to UTF-8 character(s)...
*dest++ = (cups_utf8_t)ch;
i --;
+
+ DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x", (unsigned)ch, dest[-1]));
}
else if (ch < 0x800)
{
*/
if (i < 2)
+ {
+ DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 2)");
+
return (-1);
+ }
*dest++ = (cups_utf8_t)(0xc0 | ((ch >> 6) & 0x1f));
*dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
i -= 2;
+
+ DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x", (unsigned)ch,
+ dest[-2], dest[-1]));
}
else if (ch < 0x10000)
{
*/
if (i < 3)
+ {
+ DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 3)");
+
return (-1);
+ }
*dest++ = (cups_utf8_t)(0xe0 | ((ch >> 12) & 0x0f));
*dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
*dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
i -= 3;
+
+ DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x", (unsigned)ch,
+ dest[-3], dest[-2], dest[-1]));
}
else
{
*/
if (i < 4)
+ {
+ DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 4)");
+
return (-1);
+ }
*dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
*dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
*dest++ = (cups_utf8_t)(0x80 | ((ch >> 6) & 0x3f));
*dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
i -= 4;
+
+ DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x",
+ (unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]));
}
}
*dest = '\0';
+ DEBUG_printf(("3cupsUTF32ToUTF8: Returning %d", (int)(dest - start)));
+
return ((int)(dest - start));
}
* Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
*/
- for (workptr = work + 1, start = dest; *workptr && maxout > 1; maxout --)
+ for (workptr = work, start = dest; *workptr && maxout > 0; maxout --)
{
unichar = *workptr++;
if (!unichar)
*workptr; /* Pointer into string */
+ DEBUG_printf(("7conv_utf8_to_vbcs(dest=%p, src=\"%s\", maxout=%d, "
+ "encoding=%d)", dest, src, maxout, encoding));
+
/*
* Find legacy charset map in cache...
*/
if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
+ {
+ DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (no charmap)");
+
return (-1);
+ }
/*
* Convert input UTF-8 to internal UCS-4 (and insert BOM)...
*/
if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
+ {
+ DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (Unable to convert to UTF-32)");
+
return (-1);
+ }
/*
* Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
*/
- for (start = dest, workptr = work + 1; *workptr && maxout > 1; maxout --)
+ for (start = dest, workptr = work; *workptr && maxout > 0; maxout --)
{
unichar = *workptr++;
- if (!unichar)
- break;
/*
* Convert ASCII verbatim (optimization)...
if (unichar < 0x80)
{
- *dest++ = (cups_vbcs_t)unichar;
+ *dest++ = (cups_sbcs_t)unichar;
+
+ DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X", (unsigned)unichar,
+ dest[-1]));
+
continue;
}
if (legchar > 0xffffff)
{
if (maxout < 5)
+ {
+ DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
+
return (-1);
+ }
*dest++ = (cups_sbcs_t)(legchar >> 24);
*dest++ = (cups_sbcs_t)(legchar >> 16);
*dest++ = (cups_sbcs_t)legchar;
maxout -= 3;
+
+ DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X %02X",
+ (unsigned)unichar, dest[-4], dest[-3], dest[-2], dest[-1]));
}
else if (legchar > 0xffff)
{
if (maxout < 4)
+ {
+ DEBUG_puts("8conv_utf8_to_vbcs: Returning -1 (out of space)");
+
return (-1);
+ }
*dest++ = (cups_sbcs_t)(legchar >> 16);
*dest++ = (cups_sbcs_t)(legchar >> 8);
*dest++ = (cups_sbcs_t)legchar;
maxout -= 2;
+
+ DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X %02X",
+ (unsigned)unichar, dest[-3], dest[-2], dest[-1]));
}
else if (legchar > 0xff)
{
*dest++ = (cups_sbcs_t)legchar;
maxout --;
+
+ DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X %02X",
+ (unsigned)unichar, dest[-2], dest[-1]));
+ }
+ else
+ {
+ *dest++ = (cups_sbcs_t)legchar;
+
+ DEBUG_printf(("9conv_utf8_to_vbcs: %08x => %02X",
+ (unsigned)unichar, dest[-1]));
}
}
vmap->used --;
+ DEBUG_printf(("8conv_utf8_to_vbcs: Returning %d characters",
+ (int)(dest - start)));
+
return ((int)(dest - start));
}
* Find legacy charset map in cache...
*/
+ DEBUG_printf(("7conv_vbcs_to_utf8(dest=%p, src=%p, maxout=%d, encoding=%d)",
+ dest, src, maxout, encoding));
+
if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
+ {
+ DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (NULL vmap)");
+
return (-1);
+ }
/*
* Convert input legacy charset to internal UCS-4 (and insert BOM)...
if (legchar < 0x80)
{
*workptr++ = (cups_utf32_t)legchar;
+
+ DEBUG_printf(("9conv_vbcs_to_utf8: %02X => %08X", src[-1],
+ (unsigned)legchar));
continue;
}
if (vmap->lead2char[(int)leadchar] == leadchar)
{
if (!*src)
+ {
+ DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string)");
+
return (-1);
+ }
legchar = (legchar << 8) | *src++;
*workptr++ = 0xfffd;
else
*workptr++ = (cups_utf32_t)*crow;
+
+ DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X => %08X",
+ src[-2], src[-1], (unsigned)workptr[-1]));
continue;
}
if (vmap->lead3char[(int)leadchar] == leadchar)
{
if (!*src || !src[1])
+ {
+ DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 2)");
+
return (-1);
+ }
legchar = (legchar << 8) | *src++;
legchar = (legchar << 8) | *src++;
else if (vmap->lead4char[(int)leadchar] == leadchar)
{
if (!*src || !src[1] || !src[2])
+ {
+ DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (short string 3)");
+
return (-1);
+ }
legchar = (legchar << 8) | *src++;
legchar = (legchar << 8) | *src++;
legchar = (legchar << 8) | *src++;
}
else
+ {
+ DEBUG_puts("8conv_vbcs_to_utf8: Returning -1 (bad character)");
+
return (-1);
+ }
/*
* Find 3-byte or 4-byte legacy character...
*workptr++ = 0xfffd;
else
*workptr++ = wide2uni->unichar;
+
+ if (vmap->lead3char[(int)leadchar] == leadchar)
+ DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X => %08X",
+ src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
+ else
+ DEBUG_printf(("9conv_vbcs_to_utf8: %02X %02X %02X %02X => %08X",
+ src[-4], src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
}
*workptr = 0;
vmap->used --;
+ DEBUG_printf(("9conv_vbcs_to_utf8: Converting %d UTF-32 characters to UTF-8",
+ (int)(workptr - work)));
+
/*
* Convert internal UCS-4 to output UTF-8 (and delete BOM)...
*/
*/
-void * /* O - Charset map pointer */
+static void * /* O - Charset map pointer */
get_charmap(
const cups_encoding_t encoding) /* I - Encoding */
{
_cups_globals_t *cg = _cupsGlobals(); /* Global data */
+ DEBUG_printf(("7get_charmap(encoding=%d)", encoding));
+
/*
* Get the data directory and charset map name...
*/
snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
cg->cups_datadir, _cupsEncodingName(encoding));
- DEBUG_printf((" filename=\"%s\"\n", filename));
+ DEBUG_printf(("9get_charmap: filename=\"%s\"", filename));
/*
* Read charset map input file into cache...
* See if we already have this SBCS charset map loaded...
*/
+ DEBUG_printf(("7get_sbcs_charmap(encoding=%d, filename=\"%s\")", encoding,
+ filename));
+
for (cmap = cmap_cache; cmap; cmap = cmap->next)
{
if (cmap->encoding == encoding)
{
cmap->used ++;
- DEBUG_printf((" returning existing cmap=%p\n", cmap));
+ DEBUG_printf(("8get_sbcs_charmap: Returning existing cmap=%p", cmap));
return ((void *)cmap);
}
*/
if ((fp = cupsFileOpen(filename, "r")) == NULL)
+ {
+ DEBUG_printf(("8get_sbcs_charmap: Returning NULL (%s)", strerror(errno)));
+
return (NULL);
+ }
/*
* Allocate memory for SBCS charset map...
if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
{
cupsFileClose(fp);
- DEBUG_puts(" Unable to allocate memory!");
+ DEBUG_puts("8get_sbcs_charmap: Returning NULL (Unable to allocate memory)");
return (NULL);
}
goto sbcs_error;
unichar = strtol(s, NULL, 16);
- if (unichar < 0 || unichar > 0xffff)
+ if (unichar < 0 || unichar > 0x10ffff)
goto sbcs_error;
/*
cmap->next = cmap_cache;
cmap_cache = cmap;
- DEBUG_printf((" returning new cmap=%p\n", cmap));
+ DEBUG_printf(("8get_sbcs_charmap: Returning new cmap=%p", cmap));
return (cmap);
cupsFileClose(fp);
- DEBUG_puts(" Error, returning NULL!");
+ DEBUG_puts("8get_sbcs_charmap: Returning NULL (Read/format error)");
return (NULL);
}
char *s; /* Line parsing pointer */
char line[256]; /* Line from charset map file */
int i; /* Loop variable */
- int wide; /* 32-bit legacy char */
+ int legacy; /* 32-bit legacy char */
- DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
+ DEBUG_printf(("7get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
encoding, filename));
/*
if (vmap->encoding == encoding)
{
vmap->used ++;
- DEBUG_printf((" returning existing vmap=%p\n", vmap));
+ DEBUG_printf(("8get_vbcs_charmap: Returning existing vmap=%p", vmap));
return ((void *)vmap);
}
if ((fp = cupsFileOpen(filename, "r")) == NULL)
{
- DEBUG_printf((" Unable to open file: %s\n", strerror(errno)));
+ DEBUG_printf(("8get_vbcs_charmap: Returning NULL (%s)", strerror(errno)));
return (NULL);
}
if ((mapcount = get_charmap_count(fp)) <= 0)
{
- DEBUG_puts(" Unable to get charmap count!");
+ DEBUG_puts("8get_vbcs_charmap: Unable to get charmap count!");
+
+ cupsFileClose(fp);
return (NULL);
}
- DEBUG_printf((" mapcount=%d\n", mapcount));
+ DEBUG_printf(("8get_vbcs_charmap: mapcount=%d", mapcount));
/*
* Allocate memory for DBCS/VBCS charset map...
if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
{
+ DEBUG_puts("8get_vbcs_charmap: Unable to allocate memory!");
+
cupsFileClose(fp);
- DEBUG_puts(" Unable to allocate memory!");
return (NULL);
}
* Save DBCS/VBCS charset map into memory for transcoding...
*/
- leadchar = 0;
wide2uni = NULL;
cupsFileRewind(fp);
- i = 0;
- wide = 0;
+ i = 0;
+ legacy = 0;
while (cupsFileGets(fp, line, sizeof(line)))
{
goto vbcs_error;
unichar = strtol(s, NULL, 16);
- if (unichar < 0 || unichar > 0xffff)
+ if (unichar < 0 || unichar > 0x10ffff)
goto vbcs_error;
i ++;
-/* DEBUG_printf((" i=%d, legchar=0x%08lx, unichar=0x%04x\n", i,
- legchar, (unsigned)unichar)); */
+ DEBUG_printf(("9get_vbcs_charmap: i=%d, legchar=0x%08lx, unichar=0x%04x", i,
+ legchar, (unsigned)unichar));
/*
* Save lead char of 2/3/4-byte legacy char...
*/
- if (legchar > 0xff && legchar <= 0xffff)
+ if (legchar > 0xffffff)
{
- leadchar = (cups_sbcs_t)(legchar >> 8);
- vmap->lead2char[leadchar] = leadchar;
+ leadchar = (cups_sbcs_t)(legchar >> 24);
+ vmap->lead4char[leadchar] = leadchar;
}
-
- if (legchar > 0xffff && legchar <= 0xffffff)
+ else if (legchar > 0xffff)
{
leadchar = (cups_sbcs_t)(legchar >> 16);
vmap->lead3char[leadchar] = leadchar;
}
-
- if (legchar > 0xffffff)
+ else
{
- leadchar = (cups_sbcs_t)(legchar >> 24);
- vmap->lead4char[leadchar] = leadchar;
+ leadchar = (cups_sbcs_t)(legchar >> 8);
+ vmap->lead2char[leadchar] = leadchar;
}
/*
* Save VBCS 32-bit to Unicode mapping in sorted list table...
*/
- if (!wide)
+ if (!legacy)
{
- wide = 1;
+ legacy = 1;
vmap->widecount = (mapcount - i + 1);
wide2uni = (_cups_wide2uni_t *)calloc(vmap->widecount,
sizeof(_cups_wide2uni_t));
* Add it to the cache and return...
*/
- vmap->next = vmap_cache;
+ vmap->next = vmap_cache;
vmap_cache = vmap;
- DEBUG_printf((" returning new vmap=%p\n", vmap));
+ DEBUG_printf(("8get_vbcs_charmap: Returning new vmap=%p", vmap));
return (vmap);
cupsFileClose(fp);
- DEBUG_puts(" Error, returning NULL!");
+ DEBUG_puts("8get_vbcs_charmap: Returning NULL (Read/format error)");
return (NULL);
}
/*
- * End of "$Id: transcode.c 5373 2006-04-06 20:03:32Z mike $"
+ * End of "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
*/