/*
- * "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
+ * Transcoding support for CUPS.
*
- * Transcoding support for the Common UNIX Printing System (CUPS).
+ * Copyright 2007-2014 by Apple Inc.
+ * Copyright 1997-2007 by Easy Software Products.
*
- * Copyright 2007-2008 by Apple Inc.
- * Copyright 1997-2007 by Easy Software Products.
- *
- * These coded instructions, statements, and computer programs are the
- * property of Apple Inc. and are protected by Federal copyright
- * law. Distribution and use rights are outlined in the file "LICENSE.txt"
- * which should have been included with this file. If this file is
- * file is missing or damaged, see the license at "http://www.cups.org/".
- *
- * This file is subject to the Apple OS-Developed Software exception.
- *
- * Contents:
- *
- * _cupsCharmapFlush() - Flush all character set maps out of cache.
- * _cupsCharmapFree() - Free a character set map.
- * _cupsCharmapGet() - Get a character set map.
- * cupsCharsetToUTF8() - Convert legacy character set to UTF-8.
- * cupsUTF8ToCharset() - Convert UTF-8 to legacy character set.
- * cupsUTF8ToUTF32() - Convert UTF-8 to UTF-32.
- * cupsUTF32ToUTF8() - Convert UTF-32 to UTF-8.
- * compare_wide() - Compare key for wide (VBCS) match.
- * conv_sbcs_to_utf8() - Convert legacy SBCS to UTF-8.
- * conv_utf8_to_sbcs() - Convert UTF-8 to legacy SBCS.
- * conv_utf8_to_vbcs() - Convert UTF-8 to legacy DBCS/VBCS.
- * conv_vbcs_to_utf8() - Convert legacy DBCS/VBCS to UTF-8.
- * free_sbcs_charmap() - Free memory used by a single byte character set.
- * free_vbcs_charmap() - Free memory used by a variable byte character set.
- * get_charmap() - Lookup or get a character set map (private).
- * get_charmap_count() - Count lines in a charmap file.
- * get_sbcs_charmap() - Get SBCS Charmap.
- * get_vbcs_charmap() - Get DBCS/VBCS Charmap.
+ * Licensed under Apache License v2.0. See the file "LICENSE" for more information.
*/
/*
* Include necessary headers...
*/
-#include "globals.h"
-#include "debug.h"
+#include "cups-private.h"
+#include "debug-internal.h"
#include <limits.h>
-#include <stdlib.h>
-#include <errno.h>
#include <time.h>
+#ifdef HAVE_ICONV_H
+# include <iconv.h>
+#endif /* HAVE_ICONV_H */
/*
* Local globals...
*/
-#ifdef HAVE_PTHREAD_H
-static pthread_mutex_t map_mutex = PTHREAD_MUTEX_INITIALIZER;
+#ifdef HAVE_ICONV_H
+static _cups_mutex_t map_mutex = _CUPS_MUTEX_INITIALIZER;
/* Mutex to control access to maps */
-#endif /* HAVE_PTHREAD_H */
-static _cups_cmap_t *cmap_cache = NULL;
- /* SBCS Charmap Cache */
-static _cups_vmap_t *vmap_cache = NULL;
- /* VBCS Charmap Cache */
-
-
-/*
- * Local functions...
- */
-
-static int compare_wide(const void *k1, const void *k2);
-static int conv_sbcs_to_utf8(cups_utf8_t *dest,
- const cups_sbcs_t *src,
- int maxout,
- const cups_encoding_t encoding);
-static int conv_utf8_to_sbcs(cups_sbcs_t *dest,
- const cups_utf8_t *src,
- int maxout,
- const cups_encoding_t encoding);
-static int conv_utf8_to_vbcs(cups_sbcs_t *dest,
- const cups_utf8_t *src,
- int maxout,
- const cups_encoding_t encoding);
-static int conv_vbcs_to_utf8(cups_utf8_t *dest,
- const cups_sbcs_t *src,
- int maxout,
- const cups_encoding_t encoding);
-static void free_sbcs_charmap(_cups_cmap_t *sbcs);
-static void free_vbcs_charmap(_cups_vmap_t *vbcs);
-static void *get_charmap(const cups_encoding_t encoding);
-static int get_charmap_count(cups_file_t *fp);
-static _cups_cmap_t *get_sbcs_charmap(const cups_encoding_t encoding,
- const char *filename);
-static _cups_vmap_t *get_vbcs_charmap(const cups_encoding_t encoding,
- const char *filename);
+static iconv_t map_from_utf8 = (iconv_t)-1;
+ /* Convert from UTF-8 to charset */
+static iconv_t map_to_utf8 = (iconv_t)-1;
+ /* Convert from charset to UTF-8 */
+static cups_encoding_t map_encoding = CUPS_AUTO_ENCODING;
+ /* Which charset is cached */
+#endif /* HAVE_ICONV_H */
/*
void
_cupsCharmapFlush(void)
{
- _cups_cmap_t *cmap, /* Legacy SBCS / Unicode Charset Map */
- *cnext; /* Next Legacy SBCS Charset Map */
- _cups_vmap_t *vmap, /* Legacy VBCS / Unicode Charset Map */
- *vnext; /* Next Legacy VBCS Charset Map */
-
-
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_lock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
-
- /*
- * Loop through SBCS charset map cache, free all memory...
- */
-
- for (cmap = cmap_cache; cmap; cmap = cnext)
- {
- cnext = cmap->next;
-
- free_sbcs_charmap(cmap);
- }
-
- cmap_cache = NULL;
-
- /*
- * Loop through DBCS/VBCS charset map cache, free all memory...
- */
-
- for (vmap = vmap_cache; vmap; vmap = vnext)
- {
- vnext = vmap->next;
-
- free_vbcs_charmap(vmap);
- }
-
- vmap_cache = NULL;
-
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_unlock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
-}
-
-
-/*
- * '_cupsCharmapFree()' - Free a character set map.
- *
- * This does not actually free; use '_cupsCharmapFlush()' for that.
- */
-
-void
-_cupsCharmapFree(
- const cups_encoding_t encoding) /* I - Encoding */
-{
- _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
- _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
-
-
- /*
- * See if we already have this SBCS charset map loaded...
- */
-
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_lock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
-
- for (cmap = cmap_cache; cmap; cmap = cmap->next)
- {
- if (cmap->encoding == encoding)
- {
- if (cmap->used > 0)
- cmap->used --;
- break;
- }
- }
-
- /*
- * See if we already have this DBCS/VBCS charset map loaded...
- */
-
- for (vmap = vmap_cache; vmap; vmap = vmap->next)
+#ifdef HAVE_ICONV_H
+ if (map_from_utf8 != (iconv_t)-1)
{
- if (vmap->encoding == encoding)
- {
- if (vmap->used > 0)
- vmap->used --;
- break;
- }
+ iconv_close(map_from_utf8);
+ map_from_utf8 = (iconv_t)-1;
}
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_unlock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
-}
-
-
-/*
- * '_cupsCharmapGet()' - Get a character set map.
- *
- * This code handles single-byte (SBCS), double-byte (DBCS), and
- * variable-byte (VBCS) character sets _without_ charset escapes...
- * This code does not handle multiple-byte character sets (MBCS)
- * (such as ISO-2022-JP) with charset switching via escapes...
- */
-
-void * /* O - Charset map pointer */
-_cupsCharmapGet(
- const cups_encoding_t encoding) /* I - Encoding */
-{
- void *charmap; /* Charset map pointer */
-
-
- DEBUG_printf(("_cupsCharmapGet(encoding=%d)\n", encoding));
-
- /*
- * Check for valid arguments...
- */
-
- if (encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
+ if (map_to_utf8 != (iconv_t)-1)
{
- DEBUG_puts(" Bad encoding, returning NULL!");
- return (NULL);
+ iconv_close(map_to_utf8);
+ map_to_utf8 = (iconv_t)-1;
}
- /*
- * Lookup or get the charset map pointer and return...
- */
-
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_lock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
-
- charmap = get_charmap(encoding);
-
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_unlock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
-
- return (charmap);
+ map_encoding = CUPS_AUTO_ENCODING;
+#endif /* HAVE_ICONV_H */
}
/*
* 'cupsCharsetToUTF8()' - Convert legacy character set to UTF-8.
- *
- * This code handles single-byte (SBCS), double-byte (DBCS), and
- * variable-byte (VBCS) character sets _without_ charset escapes...
- * This code does not handle multiple-byte character sets (MBCS)
- * (such as ISO-2022-JP) with charset switching via escapes...
*/
int /* O - Count or -1 on error */
cupsCharsetToUTF8(
- cups_utf8_t *dest, /* O - Target string */
- const char *src, /* I - Source string */
- const int maxout, /* I - Max output */
+ cups_utf8_t *dest, /* O - Target string */
+ const char *src, /* I - Source string */
+ const int maxout, /* I - Max output */
const cups_encoding_t encoding) /* I - Encoding */
{
- int bytes; /* Number of bytes converted */
+ cups_utf8_t *destptr; /* Pointer into UTF-8 buffer */
+#ifdef HAVE_ICONV_H
+ size_t srclen, /* Length of source string */
+ outBytesLeft; /* Bytes remaining in output buffer */
+#endif /* HAVE_ICONV_H */
/*
* Check for valid arguments...
*/
- DEBUG_printf(("cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)\n",
- dest, src, maxout, encoding));
-
- if (dest)
- *dest = '\0';
+ DEBUG_printf(("2cupsCharsetToUTF8(dest=%p, src=\"%s\", maxout=%d, encoding=%d)", (void *)dest, src, maxout, encoding));
- if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
+ if (!dest || !src || maxout < 1)
{
- DEBUG_puts(" Bad arguments, returning -1");
+ if (dest)
+ *dest = '\0';
+
+ DEBUG_puts("3cupsCharsetToUTF8: Bad arguments, returning -1");
return (-1);
}
* Handle identity conversions...
*/
- if (encoding == CUPS_UTF8 ||
- encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
+ if (encoding == CUPS_UTF8 || encoding <= CUPS_US_ASCII ||
+ encoding >= CUPS_ENCODING_VBCS_END)
{
- strlcpy((char *)dest, src, maxout);
+ strlcpy((char *)dest, src, (size_t)maxout);
return ((int)strlen((char *)dest));
}
* Handle ISO-8859-1 to UTF-8 directly...
*/
+ destptr = dest;
+
if (encoding == CUPS_ISO8859_1)
{
int ch; /* Character from string */
- cups_utf8_t *destptr, /* Pointer into UTF-8 buffer */
- *destend; /* End of UTF-8 buffer */
+ cups_utf8_t *destend; /* End of UTF-8 buffer */
- destptr = dest;
destend = dest + maxout - 2;
while (*src && destptr < destend)
if (ch & 128)
{
- *destptr++ = 0xc0 | (ch >> 6);
- *destptr++ = 0x80 | (ch & 0x3f);
+ *destptr++ = (cups_utf8_t)(0xc0 | (ch >> 6));
+ *destptr++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
}
else
- *destptr++ = ch;
+ *destptr++ = (cups_utf8_t)ch;
}
*destptr = '\0';
* Convert input legacy charset to UTF-8...
*/
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_lock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
+#ifdef HAVE_ICONV_H
+ _cupsMutexLock(&map_mutex);
+
+ if (map_encoding != encoding)
+ {
+ char toset[1024]; /* Destination character set */
+
+ _cupsCharmapFlush();
+
+ snprintf(toset, sizeof(toset), "%s//IGNORE", _cupsEncodingName(encoding));
+
+ map_encoding = encoding;
+ map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
+ map_to_utf8 = iconv_open("UTF-8", toset);
+ }
+
+ if (map_to_utf8 != (iconv_t)-1)
+ {
+ char *altdestptr = (char *)dest; /* Silence bogus GCC type-punned */
+
+ srclen = strlen(src);
+ outBytesLeft = (size_t)maxout - 1;
+
+ iconv(map_to_utf8, (char **)&src, &srclen, &altdestptr, &outBytesLeft);
+ *altdestptr = '\0';
+
+ _cupsMutexUnlock(&map_mutex);
+
+ return ((int)(altdestptr - (char *)dest));
+ }
+
+ _cupsMutexUnlock(&map_mutex);
+#endif /* HAVE_ICONV_H */
- if (encoding < CUPS_ENCODING_SBCS_END)
- bytes = conv_sbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
- else
- bytes = conv_vbcs_to_utf8(dest, (cups_sbcs_t *)src, maxout, encoding);
+ /*
+ * No iconv() support, so error out...
+ */
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_unlock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
+ *destptr = '\0';
- return (bytes);
+ return (-1);
}
/*
* 'cupsUTF8ToCharset()' - Convert UTF-8 to legacy character set.
- *
- * This code handles single-byte (SBCS), double-byte (DBCS), and
- * variable-byte (VBCS) character sets _without_ charset escapes...
- * This code does not handle multiple-byte character sets (MBCS)
- * (such as ISO-2022-JP) with charset switching via escapes...
*/
int /* O - Count or -1 on error */
const int maxout, /* I - Max output */
const cups_encoding_t encoding) /* I - Encoding */
{
- int bytes; /* Number of bytes converted */
+ char *destptr; /* Pointer into destination */
+#ifdef HAVE_ICONV_H
+ size_t srclen, /* Length of source string */
+ outBytesLeft; /* Bytes remaining in output buffer */
+#endif /* HAVE_ICONV_H */
/*
* Check for valid arguments...
*/
- if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
+ if (!dest || !src || maxout < 1)
{
if (dest)
*dest = '\0';
*/
if (encoding == CUPS_UTF8 ||
- encoding < 0 || encoding >= CUPS_ENCODING_VBCS_END)
+ encoding >= CUPS_ENCODING_VBCS_END)
{
- strlcpy(dest, (char *)src, maxout);
+ strlcpy(dest, (char *)src, (size_t)maxout);
return ((int)strlen(dest));
}
* Handle UTF-8 to ISO-8859-1 directly...
*/
- if (encoding == CUPS_ISO8859_1)
- {
- int ch; /* Character from string */
- char *destptr, /* Pointer into ISO-8859-1 buffer */
- *destend; /* End of ISO-8859-1 buffer */
+ destptr = dest;
+ if (encoding == CUPS_ISO8859_1 || encoding <= CUPS_US_ASCII)
+ {
+ int ch, /* Character from string */
+ maxch; /* Maximum character for charset */
+ char *destend; /* End of ISO-8859-1 buffer */
- destptr = dest;
+ maxch = encoding == CUPS_ISO8859_1 ? 256 : 128;
destend = dest + maxout - 1;
while (*src && destptr < destend)
{
ch = ((ch & 0x1f) << 6) | (*src++ & 0x3f);
- if (ch < 256)
- *destptr++ = ch;
+ if (ch < maxch)
+ *destptr++ = (char)ch;
else
*destptr++ = '?';
}
(ch & 0xf8) == 0xf0)
*destptr++ = '?';
else if (!(ch & 0x80))
- *destptr++ = ch;
+ *destptr++ = (char)ch;
}
*destptr = '\0';
return ((int)(destptr - dest));
}
+#ifdef HAVE_ICONV_H
/*
* Convert input UTF-8 to legacy charset...
*/
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_lock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
+ _cupsMutexLock(&map_mutex);
+
+ if (map_encoding != encoding)
+ {
+ char toset[1024]; /* Destination character set */
+
+ _cupsCharmapFlush();
+
+ snprintf(toset, sizeof(toset), "%s//IGNORE", _cupsEncodingName(encoding));
+
+ map_encoding = encoding;
+ map_from_utf8 = iconv_open(_cupsEncodingName(encoding), "UTF-8");
+ map_to_utf8 = iconv_open("UTF-8", toset);
+ }
+
+ if (map_from_utf8 != (iconv_t)-1)
+ {
+ char *altsrc = (char *)src; /* Silence bogus GCC type-punned */
+
+ srclen = strlen((char *)src);
+ outBytesLeft = (size_t)maxout - 1;
- if (encoding < CUPS_ENCODING_SBCS_END)
- bytes = conv_utf8_to_sbcs((cups_sbcs_t *)dest, src, maxout, encoding);
- else
- bytes = conv_utf8_to_vbcs((cups_sbcs_t *)dest, src, maxout, encoding);
+ iconv(map_from_utf8, &altsrc, &srclen, &destptr, &outBytesLeft);
+ *destptr = '\0';
+
+ _cupsMutexUnlock(&map_mutex);
+
+ return ((int)(destptr - dest));
+ }
+
+ _cupsMutexUnlock(&map_mutex);
+#endif /* HAVE_ICONV_H */
+
+ /*
+ * No iconv() support, so error out...
+ */
-#ifdef HAVE_PTHREAD_H
- pthread_mutex_unlock(&map_mutex);
-#endif /* HAVE_PTHREAD_H */
+ *destptr = '\0';
- return (bytes);
+ return (-1);
}
* Check for valid arguments and clear output...
*/
- DEBUG_printf(("cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)\n", dest,
- src ? (const char *)src : "(null)", maxout));
+ DEBUG_printf(("2cupsUTF8ToUTF32(dest=%p, src=\"%s\", maxout=%d)", (void *)dest, src, maxout));
if (dest)
*dest = 0;
if (!dest || !src || maxout < 1 || maxout > CUPS_MAX_USTRING)
{
- DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad arguments)");
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad arguments)");
return (-1);
}
*dest++ = ch;
- DEBUG_printf(("cupsUTF8ToUTF32: %02x => %08X\n", src[-1], ch));
+ DEBUG_printf(("4cupsUTF8ToUTF32: %02x => %08X", src[-1], ch));
continue;
}
else if ((ch & 0xe0) == 0xc0)
next = *src++;
if ((next & 0xc0) != 0x80)
{
- DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
return (-1);
}
- ch32 = ((ch & 0x1f) << 6) | (next & 0x3f);
+ ch32 = (cups_utf32_t)((ch & 0x1f) << 6) | (cups_utf32_t)(next & 0x3f);
/*
* Check for non-shortest form (invalid UTF-8)...
if (ch32 < 0x80)
{
- DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
return (-1);
}
*dest++ = ch32;
- DEBUG_printf(("cupsUTF8ToUTF32: %02x %02x => %08X\n",
+ DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x => %08X",
src[-2], src[-1], (unsigned)ch32));
}
else if ((ch & 0xf0) == 0xe0)
next = *src++;
if ((next & 0xc0) != 0x80)
{
- DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
return (-1);
}
- ch32 = ((ch & 0x0f) << 6) | (next & 0x3f);
+ ch32 = (cups_utf32_t)((ch & 0x0f) << 6) | (cups_utf32_t)(next & 0x3f);
next = *src++;
if ((next & 0xc0) != 0x80)
{
- DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
return (-1);
}
- ch32 = (ch32 << 6) | (next & 0x3f);
+ ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f);
/*
* Check for non-shortest form (invalid UTF-8)...
if (ch32 < 0x800)
{
- DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
return (-1);
}
*dest++ = ch32;
- DEBUG_printf(("cupsUTF8ToUTF32: %02x %02x %02x => %08X\n",
+ DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x => %08X",
src[-3], src[-2], src[-1], (unsigned)ch32));
}
else if ((ch & 0xf8) == 0xf0)
next = *src++;
if ((next & 0xc0) != 0x80)
{
- DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
return (-1);
}
- ch32 = ((ch & 0x07) << 6) | (next & 0x3f);
+ ch32 = (cups_utf32_t)((ch & 0x07) << 6) | (cups_utf32_t)(next & 0x3f);
next = *src++;
if ((next & 0xc0) != 0x80)
{
- DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
return (-1);
}
- ch32 = (ch32 << 6) | (next & 0x3f);
+ ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f);
next = *src++;
if ((next & 0xc0) != 0x80)
{
- DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
return (-1);
}
- ch32 = (ch32 << 6) | (next & 0x3f);
+ ch32 = (ch32 << 6) | (cups_utf32_t)(next & 0x3f);
/*
* Check for non-shortest form (invalid UTF-8)...
if (ch32 < 0x10000)
{
- DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
return (-1);
}
*dest++ = ch32;
- DEBUG_printf(("cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X\n",
+ DEBUG_printf(("4cupsUTF8ToUTF32: %02x %02x %02x %02x => %08X",
src[-4], src[-3], src[-2], src[-1], (unsigned)ch32));
}
else
* More than 4-octet (invalid UTF-8 sequence)...
*/
- DEBUG_puts("cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
+ DEBUG_puts("3cupsUTF8ToUTF32: Returning -1 (bad UTF-8 sequence)");
return (-1);
}
*dest = 0;
- DEBUG_printf(("cupsUTF8ToUTF32: Returning %d characters\n", maxout - 1 - i));
+ DEBUG_printf(("3cupsUTF8ToUTF32: Returning %d characters", maxout - 1 - i));
return (maxout - 1 - i);
}
* Check for valid arguments and clear output...
*/
- DEBUG_printf(("cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)\n", dest, src,
- maxout));
+ DEBUG_printf(("2cupsUTF32ToUTF8(dest=%p, src=%p, maxout=%d)", (void *)dest, (void *)src, maxout));
if (dest)
*dest = '\0';
if (!dest || !src || maxout < 1)
{
- DEBUG_puts("cupsUTF32ToUTF8: Returning -1 (bad args)");
+ DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (bad args)");
return (-1);
}
start = dest;
swap = *src == 0xfffe0000;
- DEBUG_printf(("cupsUTF32ToUTF8: swap=%d\n", swap));
+ DEBUG_printf(("4cupsUTF32ToUTF8: swap=%d", swap));
if (*src == 0xfffe0000 || *src == 0xfeff)
src ++;
if (ch > 0x10ffff)
{
- DEBUG_puts("cupsUTF32ToUTF8: Returning -1 (character out of range)");
+ DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (character out of range)");
return (-1);
}
*dest++ = (cups_utf8_t)ch;
i --;
- DEBUG_printf(("cupsUTF32ToUTF8: %08x => %02x\n", (unsigned)ch, dest[-1]));
+ DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x", (unsigned)ch, dest[-1]));
}
else if (ch < 0x800)
{
if (i < 2)
{
- DEBUG_puts("cupsUTF32ToUTF8: Returning -1 (too long 2)");
+ DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 2)");
return (-1);
}
*dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
i -= 2;
- DEBUG_printf(("cupsUTF32ToUTF8: %08x => %02x %02x\n", (unsigned)ch,
+ DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x", (unsigned)ch,
dest[-2], dest[-1]));
}
else if (ch < 0x10000)
if (i < 3)
{
- DEBUG_puts("cupsUTF32ToUTF8: Returning -1 (too long 3)");
+ DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 3)");
return (-1);
}
*dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
i -= 3;
- DEBUG_printf(("cupsUTF32ToUTF8: %08x => %02x %02x %02x\n", (unsigned)ch,
+ DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x", (unsigned)ch,
dest[-3], dest[-2], dest[-1]));
}
else
*/
if (i < 4)
+ {
+ DEBUG_puts("3cupsUTF32ToUTF8: Returning -1 (too long 4)");
+
return (-1);
+ }
*dest++ = (cups_utf8_t)(0xf0 | ((ch >> 18) & 0x07));
*dest++ = (cups_utf8_t)(0x80 | ((ch >> 12) & 0x3f));
*dest++ = (cups_utf8_t)(0x80 | (ch & 0x3f));
i -= 4;
- DEBUG_printf(("cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x\n",
+ DEBUG_printf(("4cupsUTF32ToUTF8: %08x => %02x %02x %02x %02x",
(unsigned)ch, dest[-4], dest[-3], dest[-2], dest[-1]));
}
}
*dest = '\0';
- DEBUG_printf(("cupsUTF32ToUTF8: Returning %d\n", (int)(dest - start)));
-
- return ((int)(dest - start));
-}
-
-
-/*
- * 'compare_wide()' - Compare key for wide (VBCS) match.
- */
-
-static int
-compare_wide(const void *k1, /* I - Key char */
- const void *k2) /* I - Map char */
-{
- cups_vbcs_t key; /* Legacy key character */
- cups_vbcs_t map; /* Legacy map character */
-
-
- key = *((cups_vbcs_t *)k1);
- map = ((_cups_wide2uni_t *)k2)->widechar;
-
- return ((int)(key - map));
-}
-
-
-/*
- * 'conv_sbcs_to_utf8()' - Convert legacy SBCS to UTF-8.
- */
-
-static int /* O - Count or -1 on error */
-conv_sbcs_to_utf8(
- cups_utf8_t *dest, /* O - Target string */
- const cups_sbcs_t *src, /* I - Source string */
- int maxout, /* I - Max output */
- const cups_encoding_t encoding) /* I - Encoding */
-{
- _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
- cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
- cups_sbcs_t legchar; /* Legacy character value */
- cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
- *workptr; /* Pointer into string */
-
-
- /*
- * Find legacy charset map in cache...
- */
-
- if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
- return (-1);
-
- /*
- * Convert input legacy charset to internal UCS-4 (and insert BOM)...
- */
-
- work[0] = 0xfeff;
- for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
- {
- legchar = *src++;
-
- /*
- * Convert ASCII verbatim (optimization)...
- */
-
- if (legchar < 0x80)
- *workptr++ = (cups_utf32_t)legchar;
- else
- {
- /*
- * Convert unknown character to Replacement Character...
- */
-
- crow = cmap->char2uni + legchar;
-
- if (!*crow)
- *workptr++ = 0xfffd;
- else
- *workptr++ = (cups_utf32_t)*crow;
- }
- }
-
- *workptr = 0;
-
- /*
- * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
- */
-
- cmap->used --;
-
- return (cupsUTF32ToUTF8(dest, work, maxout));
-}
-
-
-/*
- * 'conv_utf8_to_sbcs()' - Convert UTF-8 to legacy SBCS.
- */
-
-static int /* O - Count or -1 on error */
-conv_utf8_to_sbcs(
- cups_sbcs_t *dest, /* O - Target string */
- const cups_utf8_t *src, /* I - Source string */
- int maxout, /* I - Max output */
- const cups_encoding_t encoding) /* I - Encoding */
-{
- cups_sbcs_t *start; /* Start of destination string */
- _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
- cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
- cups_utf32_t unichar; /* Character value */
- cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
- *workptr; /* Pointer into string */
-
-
- /*
- * Find legacy charset map in cache...
- */
-
- if ((cmap = (_cups_cmap_t *)get_charmap(encoding)) == NULL)
- return (-1);
-
- /*
- * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
- */
-
- if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
- return (-1);
-
- /*
- * Convert internal UCS-4 to SBCS legacy charset (and delete BOM)...
- */
-
- for (workptr = work, start = dest; *workptr && maxout > 0; maxout --)
- {
- unichar = *workptr++;
- if (!unichar)
- break;
-
- /*
- * Convert ASCII verbatim (optimization)...
- */
-
- if (unichar < 0x80)
- {
- *dest++ = (cups_sbcs_t)unichar;
- continue;
- }
-
- /*
- * Convert unknown character to visible replacement...
- */
-
- srow = cmap->uni2char[(int)((unichar >> 8) & 0xff)];
-
- if (srow)
- srow += (int)(unichar & 0xff);
-
- if (!srow || !*srow)
- *dest++ = '?';
- else
- *dest++ = *srow;
- }
-
- *dest = '\0';
-
- cmap->used --;
+ DEBUG_printf(("3cupsUTF32ToUTF8: Returning %d", (int)(dest - start)));
return ((int)(dest - start));
}
-
-
-/*
- * 'conv_utf8_to_vbcs()' - Convert UTF-8 to legacy DBCS/VBCS.
- */
-
-static int /* O - Count or -1 on error */
-conv_utf8_to_vbcs(
- cups_sbcs_t *dest, /* O - Target string */
- const cups_utf8_t *src, /* I - Source string */
- int maxout, /* I - Max output */
- const cups_encoding_t encoding) /* I - Encoding */
-{
- cups_sbcs_t *start; /* Start of destination string */
- _cups_vmap_t *vmap; /* Legacy DBCS / Unicode Charset Map */
- cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
- cups_utf32_t unichar; /* Character value */
- cups_vbcs_t legchar; /* Legacy character value */
- cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
- *workptr; /* Pointer into string */
-
-
- DEBUG_printf(("conv_utf8_to_vbcs(dest=%p, src=\"%s\", maxout=%d, "
- "encoding=%d)\n", dest, src, maxout, encoding));
-
- /*
- * Find legacy charset map in cache...
- */
-
- if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
- {
- DEBUG_puts("conv_utf8_to_vbcs: Returning -1 (no charmap)");
-
- return (-1);
- }
-
- /*
- * Convert input UTF-8 to internal UCS-4 (and insert BOM)...
- */
-
- if (cupsUTF8ToUTF32(work, src, CUPS_MAX_USTRING) < 0)
- {
- DEBUG_puts("conv_utf8_to_vbcs: Returning -1 (Unable to convert to UTF-32)");
-
- return (-1);
- }
-
- /*
- * Convert internal UCS-4 to VBCS legacy charset (and delete BOM)...
- */
-
- for (start = dest, workptr = work; *workptr && maxout > 0; maxout --)
- {
- unichar = *workptr++;
-
- /*
- * Convert ASCII verbatim (optimization)...
- */
-
- if (unichar < 0x80)
- {
- *dest++ = (cups_sbcs_t)unichar;
-
- DEBUG_printf(("conv_utf8_to_vbcs: %08x => %02X\n", (unsigned)unichar,
- dest[-1]));
-
- continue;
- }
-
- /*
- * Convert unknown character to visible replacement...
- */
-
- vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
-
- if (vrow)
- vrow += (int)(unichar & 0xff);
-
- if (!vrow || !*vrow)
- legchar = (cups_vbcs_t)'?';
- else
- legchar = (cups_vbcs_t)*vrow;
-
- /*
- * Save n-byte legacy character...
- */
-
- if (legchar > 0xffffff)
- {
- if (maxout < 5)
- {
- DEBUG_puts("conv_utf8_to_vbcs: Returning -1 (out of space)");
-
- return (-1);
- }
-
- *dest++ = (cups_sbcs_t)(legchar >> 24);
- *dest++ = (cups_sbcs_t)(legchar >> 16);
- *dest++ = (cups_sbcs_t)(legchar >> 8);
- *dest++ = (cups_sbcs_t)legchar;
-
- maxout -= 3;
-
- DEBUG_printf(("conv_utf8_to_vbcs: %08x => %02X %02X %02X %02X\n",
- (unsigned)unichar, dest[-4], dest[-3], dest[-2], dest[-1]));
- }
- else if (legchar > 0xffff)
- {
- if (maxout < 4)
- {
- DEBUG_puts("conv_utf8_to_vbcs: Returning -1 (out of space)");
-
- return (-1);
- }
-
- *dest++ = (cups_sbcs_t)(legchar >> 16);
- *dest++ = (cups_sbcs_t)(legchar >> 8);
- *dest++ = (cups_sbcs_t)legchar;
-
- maxout -= 2;
-
- DEBUG_printf(("conv_utf8_to_vbcs: %08x => %02X %02X %02X\n",
- (unsigned)unichar, dest[-3], dest[-2], dest[-1]));
- }
- else if (legchar > 0xff)
- {
- *dest++ = (cups_sbcs_t)(legchar >> 8);
- *dest++ = (cups_sbcs_t)legchar;
-
- maxout --;
-
- DEBUG_printf(("conv_utf8_to_vbcs: %08x => %02X %02X\n",
- (unsigned)unichar, dest[-2], dest[-1]));
- }
- else
- {
- *dest++ = legchar;
-
- DEBUG_printf(("conv_utf8_to_vbcs: %08x => %02X\n",
- (unsigned)unichar, dest[-1]));
- }
- }
-
- *dest = '\0';
-
- vmap->used --;
-
- DEBUG_printf(("conv_utf8_to_vbcs: Returning %d characters\n",
- (int)(dest - start)));
-
- return ((int)(dest - start));
-}
-
-
-/*
- * 'conv_vbcs_to_utf8()' - Convert legacy DBCS/VBCS to UTF-8.
- */
-
-static int /* O - Count or -1 on error */
-conv_vbcs_to_utf8(
- cups_utf8_t *dest, /* O - Target string */
- const cups_sbcs_t *src, /* I - Source string */
- int maxout, /* I - Max output */
- const cups_encoding_t encoding) /* I - Encoding */
-{
- _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
- cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
- _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
- cups_sbcs_t leadchar; /* Lead char of n-byte legacy char */
- cups_vbcs_t legchar; /* Legacy character value */
- cups_utf32_t work[CUPS_MAX_USTRING], /* Internal UCS-4 string */
- *workptr; /* Pointer into string */
-
-
- /*
- * Find legacy charset map in cache...
- */
-
- DEBUG_printf(("conv_vbcs_to_utf8(dest=%p, src=%p, maxout=%d, encoding=%d)\n",
- dest, src, maxout, encoding));
-
- if ((vmap = (_cups_vmap_t *)get_charmap(encoding)) == NULL)
- {
- DEBUG_puts("conv_vbcs_to_utf8: Returning -1 (NULL vmap)");
-
- return (-1);
- }
-
- /*
- * Convert input legacy charset to internal UCS-4 (and insert BOM)...
- */
-
- work[0] = 0xfeff;
- for (workptr = work + 1; *src && workptr < (work + CUPS_MAX_USTRING - 1);)
- {
- legchar = *src++;
- leadchar = (cups_sbcs_t)legchar;
-
- /*
- * Convert ASCII verbatim (optimization)...
- */
-
- if (legchar < 0x80)
- {
- *workptr++ = (cups_utf32_t)legchar;
-
- DEBUG_printf(("conv_vbcs_to_utf8: %02X => %08X\n", src[-1],
- (unsigned)legchar));
- continue;
- }
-
- /*
- * Convert 2-byte legacy character...
- */
-
- if (vmap->lead2char[(int)leadchar] == leadchar)
- {
- if (!*src)
- {
- DEBUG_puts("conv_vbcs_to_utf8: Returning -1 (short string)");
-
- return (-1);
- }
-
- legchar = (legchar << 8) | *src++;
-
- /*
- * Convert unknown character to Replacement Character...
- */
-
- crow = vmap->char2uni[(int)((legchar >> 8) & 0xff)];
- if (crow)
- crow += (int) (legchar & 0xff);
-
- if (!crow || !*crow)
- *workptr++ = 0xfffd;
- else
- *workptr++ = (cups_utf32_t)*crow;
-
- DEBUG_printf(("conv_vbcs_to_utf8: %02X %02X => %08X\n",
- src[-2], src[-1], (unsigned)workptr[-1]));
- continue;
- }
-
- /*
- * Fetch 3-byte or 4-byte legacy character...
- */
-
- if (vmap->lead3char[(int)leadchar] == leadchar)
- {
- if (!*src || !src[1])
- {
- DEBUG_puts("conv_vbcs_to_utf8: Returning -1 (short string 2)");
-
- return (-1);
- }
-
- legchar = (legchar << 8) | *src++;
- legchar = (legchar << 8) | *src++;
- }
- else if (vmap->lead4char[(int)leadchar] == leadchar)
- {
- if (!*src || !src[1] || !src[2])
- {
- DEBUG_puts("conv_vbcs_to_utf8: Returning -1 (short string 3)");
-
- return (-1);
- }
-
- legchar = (legchar << 8) | *src++;
- legchar = (legchar << 8) | *src++;
- legchar = (legchar << 8) | *src++;
- }
- else
- {
- DEBUG_puts("conv_vbcs_to_utf8: Returning -1 (bad character)");
-
- return (-1);
- }
-
- /*
- * Find 3-byte or 4-byte legacy character...
- */
-
- wide2uni = (_cups_wide2uni_t *)bsearch(&legchar,
- vmap->wide2uni,
- vmap->widecount,
- sizeof(_cups_wide2uni_t),
- compare_wide);
-
- /*
- * Convert unknown character to Replacement Character...
- */
-
- if (!wide2uni || !wide2uni->unichar)
- *workptr++ = 0xfffd;
- else
- *workptr++ = wide2uni->unichar;
-
- if (vmap->lead3char[(int)leadchar] == leadchar)
- DEBUG_printf(("conv_vbcs_to_utf8: %02X %02X %02X => %08X\n",
- src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
- else
- DEBUG_printf(("conv_vbcs_to_utf8: %02X %02X %02X %02X => %08X\n",
- src[-4], src[-3], src[-2], src[-1], (unsigned)workptr[-1]));
- }
-
- *workptr = 0;
-
- vmap->used --;
-
- DEBUG_printf(("conv_vbcs_to_utf8: Converting %d UTF-32 characters to UTF-8\n",
- (int)(workptr - work)));
-
- /*
- * Convert internal UCS-4 to output UTF-8 (and delete BOM)...
- */
-
- return (cupsUTF32ToUTF8(dest, work, maxout));
-}
-
-
-/*
- * 'free_sbcs_charmap()' - Free memory used by a single byte character set.
- */
-
-static void
-free_sbcs_charmap(_cups_cmap_t *cmap) /* I - Character set */
-{
- int i; /* Looping variable */
-
-
- for (i = 0; i < 256; i ++)
- if (cmap->uni2char[i])
- free(cmap->uni2char[i]);
-
- free(cmap);
-}
-
-
-/*
- * 'free_vbcs_charmap()' - Free memory used by a variable byte character set.
- */
-
-static void
-free_vbcs_charmap(_cups_vmap_t *vmap) /* I - Character set */
-{
- int i; /* Looping variable */
-
-
- for (i = 0; i < 256; i ++)
- if (vmap->char2uni[i])
- free(vmap->char2uni[i]);
-
- for (i = 0; i < 256; i ++)
- if (vmap->uni2char[i])
- free(vmap->uni2char[i]);
-
- if (vmap->wide2uni)
- free(vmap->wide2uni);
-
- free(vmap);
-}
-
-
-/*
- * 'get_charmap()' - Lookup or get a character set map (private).
- *
- * This code handles single-byte (SBCS), double-byte (DBCS), and
- * variable-byte (VBCS) character sets _without_ charset escapes...
- * This code does not handle multiple-byte character sets (MBCS)
- * (such as ISO-2022-JP) with charset switching via escapes...
- */
-
-
-static void * /* O - Charset map pointer */
-get_charmap(
- const cups_encoding_t encoding) /* I - Encoding */
-{
- char filename[1024]; /* Filename for charset map file */
- _cups_globals_t *cg = _cupsGlobals(); /* Global data */
-
-
- DEBUG_printf(("get_charmap(encoding=%d)\n", encoding));
-
- /*
- * Get the data directory and charset map name...
- */
-
- snprintf(filename, sizeof(filename), "%s/charmaps/%s.txt",
- cg->cups_datadir, _cupsEncodingName(encoding));
-
- DEBUG_printf(("get_charmap: filename=\"%s\"\n", filename));
-
- /*
- * Read charset map input file into cache...
- */
-
- if (encoding < CUPS_ENCODING_SBCS_END)
- return (get_sbcs_charmap(encoding, filename));
- else if (encoding < CUPS_ENCODING_VBCS_END)
- return (get_vbcs_charmap(encoding, filename));
- else
- return (NULL);
-}
-
-
-/*
- * 'get_charmap_count()' - Count lines in a charmap file.
- */
-
-static int /* O - Count or -1 on error */
-get_charmap_count(cups_file_t *fp) /* I - File to read from */
-{
- int count; /* Number of lines */
- char line[256]; /* Line from input map file */
-
-
- /*
- * Count lines in map input file...
- */
-
- count = 0;
-
- while (cupsFileGets(fp, line, sizeof(line)))
- if (line[0] == '0')
- count ++;
-
- /*
- * Return the number of lines...
- */
-
- if (count > 0)
- return (count);
- else
- return (-1);
-}
-
-
-/*
- * 'get_sbcs_charmap()' - Get SBCS Charmap.
- */
-
-static _cups_cmap_t * /* O - Charmap or 0 on error */
-get_sbcs_charmap(
- const cups_encoding_t encoding, /* I - Charmap Encoding */
- const char *filename) /* I - Charmap Filename */
-{
- unsigned long legchar; /* Legacy character value */
- cups_utf32_t unichar; /* Unicode character value */
- _cups_cmap_t *cmap; /* Legacy SBCS / Unicode Charset Map */
- cups_file_t *fp; /* Charset map file pointer */
- char *s; /* Line parsing pointer */
- cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
- cups_sbcs_t *srow; /* Pointer to SBCS row in 'uni2char' */
- char line[256]; /* Line from charset map file */
-
-
- /*
- * See if we already have this SBCS charset map loaded...
- */
-
- DEBUG_printf(("get_sbcs_charmap(encoding=%d, filename=\"%s\")\n", encoding,
- filename));
-
- for (cmap = cmap_cache; cmap; cmap = cmap->next)
- {
- if (cmap->encoding == encoding)
- {
- cmap->used ++;
- DEBUG_printf(("get_sbcs_charmap: Returning existing cmap=%p\n", cmap));
-
- return ((void *)cmap);
- }
- }
-
- /*
- * Open SBCS charset map input file...
- */
-
- if ((fp = cupsFileOpen(filename, "r")) == NULL)
- {
- DEBUG_printf(("get_sbcs_charmap: Returning NULL (%s)\n", strerror(errno)));
-
- return (NULL);
- }
-
- /*
- * Allocate memory for SBCS charset map...
- */
-
- if ((cmap = (_cups_cmap_t *)calloc(1, sizeof(_cups_cmap_t))) == NULL)
- {
- cupsFileClose(fp);
- DEBUG_puts("get_sbcs_charmap: Returning NULL (Unable to allocate memory)");
-
- return (NULL);
- }
-
- cmap->used ++;
- cmap->encoding = encoding;
-
- /*
- * Save SBCS charset map into memory for transcoding...
- */
-
- while (cupsFileGets(fp, line, sizeof(line)))
- {
- if (line[0] != '0')
- continue;
-
- legchar = strtol(line, &s, 16);
- if (legchar < 0 || legchar > 0xff)
- goto sbcs_error;
-
- unichar = strtol(s, NULL, 16);
- if (unichar < 0 || unichar > 0x10ffff)
- goto sbcs_error;
-
- /*
- * Save legacy to Unicode mapping in direct lookup table...
- */
-
- crow = cmap->char2uni + legchar;
- *crow = (cups_ucs2_t)(unichar & 0xffff);
-
- /*
- * Save Unicode to legacy mapping in indirect lookup table...
- */
-
- srow = cmap->uni2char[(unichar >> 8) & 0xff];
- if (!srow)
- {
- srow = (cups_sbcs_t *)calloc(256, sizeof(cups_sbcs_t));
- if (!srow)
- goto sbcs_error;
-
- cmap->uni2char[(unichar >> 8) & 0xff] = srow;
- }
-
- srow += unichar & 0xff;
-
- /*
- * Convert Replacement Character to visible replacement...
- */
-
- if (unichar == 0xfffd)
- legchar = (unsigned long)'?';
-
- /*
- * First (oldest) legacy character uses Unicode mapping cell...
- */
-
- if (!*srow)
- *srow = (cups_sbcs_t)legchar;
- }
-
- cupsFileClose(fp);
-
- /*
- * Add it to the cache and return...
- */
-
- cmap->next = cmap_cache;
- cmap_cache = cmap;
-
- DEBUG_printf(("get_sbcs_charmap: Returning new cmap=%p\n", cmap));
-
- return (cmap);
-
- /*
- * If we get here, there was an error in the cmap file...
- */
-
- sbcs_error:
-
- free_sbcs_charmap(cmap);
-
- cupsFileClose(fp);
-
- DEBUG_puts("get_sbcs_charmap: Returning NULL (Read/format error)");
-
- return (NULL);
-}
-
-
-/*
- * 'get_vbcs_charmap()' - Get DBCS/VBCS Charmap.
- */
-
-static _cups_vmap_t * /* O - Charmap or 0 on error */
-get_vbcs_charmap(
- const cups_encoding_t encoding, /* I - Charmap Encoding */
- const char *filename) /* I - Charmap Filename */
-{
- _cups_vmap_t *vmap; /* Legacy VBCS / Unicode Charset Map */
- cups_ucs2_t *crow; /* Pointer to UCS-2 row in 'char2uni' */
- cups_vbcs_t *vrow; /* Pointer to VBCS row in 'uni2char' */
- _cups_wide2uni_t *wide2uni; /* Pointer to row in 'wide2uni' */
- cups_sbcs_t leadchar; /* Lead char of 2-byte legacy char */
- unsigned long legchar; /* Legacy character value */
- cups_utf32_t unichar; /* Unicode character value */
- int mapcount; /* Count of lines in charmap file */
- cups_file_t *fp; /* Charset map file pointer */
- char *s; /* Line parsing pointer */
- char line[256]; /* Line from charset map file */
- int i; /* Loop variable */
- int legacy; /* 32-bit legacy char */
-
-
- DEBUG_printf(("get_vbcs_charmap(encoding=%d, filename=\"%s\")\n",
- encoding, filename));
-
- /*
- * See if we already have this DBCS/VBCS charset map loaded...
- */
-
- for (vmap = vmap_cache; vmap; vmap = vmap->next)
- {
- if (vmap->encoding == encoding)
- {
- vmap->used ++;
- DEBUG_printf(("get_vbcs_charmap: Returning existing vmap=%p\n", vmap));
-
- return ((void *)vmap);
- }
- }
-
- /*
- * Open VBCS charset map input file...
- */
-
- if ((fp = cupsFileOpen(filename, "r")) == NULL)
- {
- DEBUG_printf(("get_vbcs_charmap: Returning NULL (%s)\n", strerror(errno)));
-
- return (NULL);
- }
-
- /*
- * Count lines in charmap file...
- */
-
- if ((mapcount = get_charmap_count(fp)) <= 0)
- {
- DEBUG_puts("get_vbcs_charmap: Unable to get charmap count!");
-
- cupsFileClose(fp);
-
- return (NULL);
- }
-
- DEBUG_printf(("get_vbcs_charmap: mapcount=%d\n", mapcount));
-
- /*
- * Allocate memory for DBCS/VBCS charset map...
- */
-
- if ((vmap = (_cups_vmap_t *)calloc(1, sizeof(_cups_vmap_t))) == NULL)
- {
- DEBUG_puts("get_vbcs_charmap: Unable to allocate memory!");
-
- cupsFileClose(fp);
-
- return (NULL);
- }
-
- vmap->used ++;
- vmap->encoding = encoding;
-
- /*
- * Save DBCS/VBCS charset map into memory for transcoding...
- */
-
- wide2uni = NULL;
-
- cupsFileRewind(fp);
-
- i = 0;
- legacy = 0;
-
- while (cupsFileGets(fp, line, sizeof(line)))
- {
- if (line[0] != '0')
- continue;
-
- legchar = strtoul(line, &s, 16);
- if (legchar == ULONG_MAX)
- goto vbcs_error;
-
- unichar = strtol(s, NULL, 16);
- if (unichar < 0 || unichar > 0x10ffff)
- goto vbcs_error;
-
- i ++;
-
-/* DEBUG_printf((" i=%d, legchar=0x%08lx, unichar=0x%04x\n", i,
- legchar, (unsigned)unichar)); */
-
- /*
- * Save lead char of 2/3/4-byte legacy char...
- */
-
- if (legchar > 0xffffff)
- {
- leadchar = (cups_sbcs_t)(legchar >> 24);
- vmap->lead4char[leadchar] = leadchar;
- }
- else if (legchar > 0xffff)
- {
- leadchar = (cups_sbcs_t)(legchar >> 16);
- vmap->lead3char[leadchar] = leadchar;
- }
- else
- {
- leadchar = (cups_sbcs_t)(legchar >> 8);
- vmap->lead2char[leadchar] = leadchar;
- }
-
- /*
- * Save Legacy to Unicode mapping...
- */
-
- if (legchar <= 0xffff)
- {
- /*
- * Save DBCS 16-bit to Unicode mapping in indirect lookup table...
- */
-
- crow = vmap->char2uni[(int)leadchar];
- if (!crow)
- {
- crow = (cups_ucs2_t *)calloc(256, sizeof(cups_ucs2_t));
- if (!crow)
- goto vbcs_error;
-
- vmap->char2uni[(int)leadchar] = crow;
- }
-
- crow[(int)(legchar & 0xff)] = (cups_ucs2_t)unichar;
- }
- else
- {
- /*
- * Save VBCS 32-bit to Unicode mapping in sorted list table...
- */
-
- if (!legacy)
- {
- legacy = 1;
- vmap->widecount = (mapcount - i + 1);
- wide2uni = (_cups_wide2uni_t *)calloc(vmap->widecount,
- sizeof(_cups_wide2uni_t));
- if (!wide2uni)
- goto vbcs_error;
-
- vmap->wide2uni = wide2uni;
- }
-
- wide2uni->widechar = (cups_vbcs_t)legchar;
- wide2uni->unichar = (cups_ucs2_t)unichar;
- wide2uni ++;
- }
-
- /*
- * Save Unicode to legacy mapping in indirect lookup table...
- */
-
- vrow = vmap->uni2char[(int)((unichar >> 8) & 0xff)];
- if (!vrow)
- {
- vrow = (cups_vbcs_t *)calloc(256, sizeof(cups_vbcs_t));
- if (!vrow)
- goto vbcs_error;
-
- vmap->uni2char[(int) ((unichar >> 8) & 0xff)] = vrow;
- }
-
- vrow += (int)(unichar & 0xff);
-
- /*
- * Convert Replacement Character to visible replacement...
- */
-
- if (unichar == 0xfffd)
- legchar = (unsigned long)'?';
-
- /*
- * First (oldest) legacy character uses Unicode mapping cell...
- */
-
- if (!*vrow)
- *vrow = (cups_vbcs_t)legchar;
- }
-
- vmap->charcount = (i - vmap->widecount);
-
- cupsFileClose(fp);
-
- /*
- * Add it to the cache and return...
- */
-
- vmap->next = vmap_cache;
- vmap_cache = vmap;
-
- DEBUG_printf(("get_vbcs_charmap: Returning new vmap=%p\n", vmap));
-
- return (vmap);
-
- /*
- * If we get here, the file contains errors...
- */
-
- vbcs_error:
-
- free_vbcs_charmap(vmap);
-
- cupsFileClose(fp);
-
- DEBUG_puts("get_vbcs_charmap: Returning NULL (Read/format error)");
-
- return (NULL);
-}
-
-
-/*
- * End of "$Id: transcode.c 7560 2008-05-13 06:34:04Z mike $"
- */